In [105]:
import numpy as np
import tensorflow
import keras
import os
import random

In [106]:
# Import data names into a list so we can randomise beat selection

AF_beats =[]
files = os.listdir('AF_Beats_jpg/')
    
for file in files:
    path = 'AF_Beats_jpg/' + file
    AF_beats.append(path)

Normal_beats =[]
files = os.listdir('Normal_Beats_jpg/')
    
for file in files:
    path = 'Normal_Beats_jpg/' + file
    Normal_beats.append(path)

In [None]:
# Now we need to find the total amount of AF beats
# In order to get equal amounts of samples which are AF and non-AF
# We randomly generate a number [1-5] corresponding to amount of normal beats in sample
# (Rest being AF), we track how many normal beats have gone in. Best way to do this 
# Is to create a normal sample first followed by an AF sample. Repeat this until we run out of
# Normal beats then we can fill in the gaps of the AF samples with the spare AF beats

number_normal = 0
number_af = 0
normal_samples =[]
AF_samples = []

def random_normal_selection(k, number, sample, beats):
        
        # First check we have beats left
        if (len(beats) != 0):
            # If the number of beats in sample selected takes us over the total number
            # Of beats in length left then we need to re-adjust
            if (k > len(beats)):
                # Set numpy random generator to default (random integer)
                rng = np.random.default_rng()
                # Re-assign k
                # First check if length of the beats is 1 in which case we can only put in
                # 1 Value or length of the beats is 0 in which case we can't do anything
                if (len(beats) == 1):
                    k = 1
                else:
                    k = int(rng.integers(1,(len(beats) + 1),size=1))
        
            for i in range(0,k):
                # Randomly chosen beat
                random_index = random.randrange(len(beats))
                beat = beats[random_index]
                # Append this beat to the normal sample
                sample.append(beat)
                # Remove this beat from the list
                del beats[random_index]
            
            number += k
        else:
            k = 0
            
        return(number)
        
i = len(Normal_beats)
print(i)

while (number_normal != i):
    
    normal_sample = []
    AF_sample =[]
    
    # Set numpy random generator to default (random integer)
    rng = np.random.default_rng()
    
    # First create a normal sample containing 5 random beats
    # Unless there isn't 5 beats to do in which case just create an AF sample
    if(len(Normal_beats) > 5):
        number_normal = random_normal_selection(5, number_normal, normal_sample, Normal_beats)
        # Append normal sample
        normal_samples.append(normal_sample)
    
    # Find out how many normal beats will be in AF sample
    number_normal_in_af_sample = int(rng.integers(1,5,size=1))
    
    # Now append these to the AF_sample
    number_normal = random_normal_selection(number_normal_in_af_sample, number_normal, AF_sample, Normal_beats)
    # Now we want to do the same but for the AF beats
    number_af = random_normal_selection((5 - number_normal_in_af_sample), number_af, AF_sample, AF_beats)
    
    # Append AF sample
    AF_samples.append(AF_sample)
    
print(AF_samples[-2])
print(normal_samples[-1])

588861


In [None]:
# Now we have used up all the normal beats. We need to ensure the sample size
# Is the same for non-AF and AF samples. So we find difference and add that many
# 'Pure' 5 AF beat samples

print(AF_samples[-1])

AF_sample = []

# Find difference between sample sizes
difference = abs(len(AF_samples) - len(normal_samples))
print(len(AF_samples))
print(len(normal_samples))
print(difference)

# If there is more AF samples then take the remainder out
# If not create more samples to make the number equal

if (len(AF_samples) > len(normal_samples)):
    del AF_samples[(len(AF_samples) - difference):]
    
elif (len(AF_samples) == len(normal_samples)):

    # First fix any samples in the AF samples array which DONT have length of 5
    # Due to running out of normal samples (This will only be the very last element)

    print(len(AF_samples[-1]))
    number_to_append = 5 - len(AF_samples[-1])

    # Now add in however random AF beats we need
    number_af = random_normal_selection(number_to_append, number_af, AF_samples[-1], AF_beats)

    print(AF_samples[-1])

else:
    
    # First fix last sample in AF array
    number_to_append = 5 - len(AF_samples[-1])
    
    number_af = random_normal_selection(number_to_append, number_af, AF_samples[-1], AF_beats)
    
    for i in range(0, difference):
        # Assemble AF samples made of 5 AF beats
        random_normal_selection(5, number_af, AF_sample, AF_beats)
        AF_samples.append(AF_sample)
# Now we have the same number of samples in AF and non-AF data

In [None]:
print(len(AF_samples))
print(len(normal_samples))
print(AF_samples[-1])

In [None]:
# Now we append all these inputs into a total input along
# With corresponding labels
total_input = []
labels = []
number_af_samples = len(AF_samples)
number_normal_samples = len(normal_samples)

AF_samples = np.asarray(AF_samples)
Normal_samples = np.asarray(normal_samples)

total_input = np.vstack((normal_samples, AF_samples))
labels = np.zeros((number_af_samples + number_normal_samples))
labels[:(number_normal_samples)] = 0
labels[(number_normal_samples):] = 1

In [None]:
# Check

print(len(labels))
print(len(total_input))
print(total_input[number_normal_samples:number_normal_samples + 1])
print(labels[number_normal_samples:number_normal_samples + 1])

In [None]:
# Now shuffle the lists but keep label order
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

total_input, labels = unison_shuffled_copies(total_input, labels)

In [None]:
# check

print(total_input [:3])
print(labels[:3])
print(total_input.shape)
print(labels.shape)

In [None]:
# Now export these as pickles

filename = 'CNN_input'
# This line uses pickle to save the total array of samples as a .pkl file

with open('{}.pkl'.format(filename), 'wb') as f:
     pickle.dump(total_input, f)
        
filename2 = 'CNN_labels'
# This line uses pickle to save the labels as a .pkl file

with open('{}.pkl'.format(filename2), 'wb') as f:
     pickle.dump(labels, f)