In [16]:
#Import from the Keras library
from keras import models
from keras.layers import Dense, Dropout, Flatten, Input 
from keras.layers import Conv2D,  MaxPooling2D
from keras.models import Model
from keras import optimizers 
from keras import utils
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
import keras
from keras_tqdm import TQDMNotebookCallback
#from secret import credentials

#This allows for Keras models to be saved. 
import h5py
#Other import statements 
import h5py
import random
import numpy as np
import pandas as pd
import pathlib
import cv2
import pymysql
import os

In [17]:
#Secrets shouldn't be in the repository
from secrets import credentials


def connect(): 
    db_host = credentials['db_host'];
    db_port = credentials['db_port'];
    db_name = credentials['db_name'];
    db_username = credentials['db_username']
    db_password = credentials['db_password']
    
    conn = pymysql.connect(db_host, user=db_username, port=db_port, passwd=db_password, db=db_name)
    return conn

In [18]:
def import_data(cache_path, conn=connect()): 

    curs = conn.cursor()
    
    recording_ids = {}
    xy = {}    
        
    image_query = "SELECT r.id, r.recording_id, r.isCSGM FROM nicu.Video_Raw AS r JOIN nicu.Video_Generated AS g ON r.id=g.raw_id  WHERE (r.recording_id>1) AND (g.RGB_Optical_Flow IS NOT NULL) LIMIT 1500"
    try:
        curs.execute(image_query) #(list(recording_ids.keys())))
        for row in curs.fetchall():
            raw_id = row[0]
            rec_id = row[1]
            csgm = row[2]
            if rec_id in recording_ids:
                recording_ids.get(rec_id).append(raw_id)
            else:
                recording_ids.update({rec_id:[raw_id]})
            xy.update({raw_id:[csgm]})
            
    except Exception as e:
        print("Error retrieving ID's", e)
        raise e
    
    #cache_path = cache_path+"recording_"+("{:02d}".format(rec_id))
    if not os.path.exists(cache_path):
        os.mkdir(cache_path)

    cache_path = cache_path+'testing'   
    for rec_id in recording_ids:
        
        #I don't know what this code is doing? - RK
        '''
        #Temporarily shortens the amount of data to use to be able to test
        raw_id_list = recording_ids.get(rec_id)
        recording_ids.update({rec_id:raw_id_list})
        '''
        for raw_id in recording_ids.get(rec_id):
            rgb_path = cache_path+'/'+str(raw_id)+".oflow.png"
            depth_path = cache_path+'/'+str(raw_id)+".dflow.png"
            if not (os.path.exists(rgb_path) and os.path.exists(depth_path)):
                try:
                    image_query = "SELECT RGB_Optical_Flow, D_Depth_Flow from Video_Generated WHERE (raw_id=%s)"
                    curs.execute(image_query, (str(raw_id)))
                    for row in curs.fetchall():
                        rgb_img = row[0]
                        depth_img = row[1]
                        if (rgb_img is not None) and not(os.path.exists(rgb_path)):
                            rgb_cv = cv2.imdecode(np.asarray(bytearray(rgb_img),dtype=np.uint8),cv2.IMREAD_UNCHANGED)
                            cv2.imwrite(rgb_path,rgb_cv)
                        if depth_img is not None and not(os.path.exists(depth_path)):
                            depth_cv = cv2.imdecode(np.asarray(bytearray(depth_img),dtype=np.uint8),cv2.IMREAD_UNCHANGED)
                            cv2.imwrite(depth_path,depth_cv)
                except Exception as e:
                        print("Error retrieving Optical Flow frame",e)
                        curs.close();
                        raise e
            
            #Only converts the two images and adds them to the containers if both exist.
            if os.path.exists(rgb_path) and os.path.exists(depth_path):
                rgb_img = create_image(rgb_path, scale)
                depth_img = create_image(depth_path, scale)
                xy.get(raw_id).extend([rgb_img,depth_img])
    curs.close()
    
    return recording_ids, xy

def create_image(path, scale=100):
    '''
    Creates and resizes a cv2 image to scale/100
    '''
    img = cv2.imread(path)
    width = int(img.shape[1] * (scale / 100))
    height = int(img.shape[0] * (scale / 100))
    img = cv2.resize(img,(width,height), interpolation=cv2.INTER_CUBIC)
    return img



In [19]:
def create_array(raw_ids, xy):    
    """
    This takes a list of ids in str form and a dictionary with keys of ids in str form with values of lists containing
    the rgb in cv2 form at index 1 the depth flow in cv2 at index 2 and the int 1 or 0 at index 0 which identifies whether 
    these images exhibit CSGM movement. 
    
    Output: Three numpy arrays
    """
    rgb_list=[]
    depth_list=[]
    csgm_list=[]
    
    #shuffles the ids for randomness
    random.shuffle(raw_ids)
    
    for i in raw_ids:
        if len(xy.get(i)) == 3:
            #if not xy.get(i)[0] == None:
            rgb_list.append(xy.get(i)[1])
            depth_list.append(xy.get(i)[2])
            csgm_list.append(xy.get(i)[0])
    x_rgb = np.array(rgb_list)
    x_depth = np.array(depth_list)
    y = np.array(csgm_list)
    return x_rgb, x_depth, y
        

In [20]:
def create_cnn(x_train, filter_info={0:[32,3]}, dropout={0:0.25}, pooling={0:2}, activation='relu'):
    """
    Input: The only required input is the array of input data. 
    
    Outpu: Outputs a keras Sequential model and a str representation of that model. 
    """
    
    model = models.Sequential() 
    str_model = "Overview of Model Architecture: \n"
    
    filter_size = 0
    
    for i in filter_info: 
        filter_size = filter_info.get(i)[1]
        num_filters = filter_info.get(i)[0]
        
        if i == 0: 
            model.add(Conv2D(num_filters, (filter_size,filter_size), activation = 'relu', input_shape=x_train.shape[1:]))
        else: 
            model.add(Conv2D(num_filters, (filter_size,filter_size), activation= 'relu'))
        
        str_model += ("2D Convulution Layer with %d filters the size of (%d,%d) and %s activation \n" %(num_filters, filter_size, filter_size, activation))
        
        model.add(Conv2D(num_filters, (filter_size,filter_size), activation= 'relu'))
        str_model += ("2D Convulution Layer with %d filters the size of (%d,%d) and %s activation \n" %(num_filters, filter_size, filter_size, activation))
        
        if i in pooling:           
            pool_filter_size = pooling.get(i)
            model.add(MaxPooling2D(pool_size=(pool_filter_size, pool_filter_size)))
            str_model += ('2D Pooling Max Pooling Layer with filter size (%d,%d)\n' %(pool_filter_size,pool_filter_size))
            
                 
        if i in dropout: 
            drop_rate = dropout.get(i)
            model.add(Dropout(drop_rate))
            str_model += ('Droput Layer with with a rate of %f \n' %(drop_rate))


    
    #These will be added to the end of every model no matter what
    model.add(Flatten())
    str_model += ('Flatten\n')                 
    print(str_model)

    return model, str_model

                    


In [24]:
def confusion_matrix(exp_values, predicted_values):
    """
    This creates a confusion matrix with the predicted accuracy of the model.
    
    exp_values must be in the format of a list and predicted values is expected to come in the format of the ouput 
    of Keras's model.predict()
    
    The ouput is a pandas dataframe that displays a confusion matrix indicitive of the accuracy of the model along 
        with a number score which is the accuracy of the model.
    """
    predicted_values = convert_predictions(predicted_values)
    
    
    
    #Creates a DataFrame of zeros
    matrix = pd.DataFrame(np.zeros((2,2)) , ['P0','P1'], ['E0','E1'])
   
    #Caculates whether the score was right or wrong and updates the confusion matrix 
    for i in range(len(exp_values)):
        if exp_values[i] == predicted_values[i]:
            matrix.iloc[[predicted_values[i]],[predicted_values[i]]] += 1
        else:
            matrix.iloc[[predicted_values[i]],[exp_values[i]]] += 1
   
    #Calculate diagonal sum and the accuracy of the model
    diagonal_sum = 0
    for i in range(2):
        diagonal_sum += matrix.iloc[i][i]
    
    score = diagonal_sum/len(exp_values)
    
    return  matrix, score
    
    
            
def convert_predictions(predictions): 
    """
    Converts predictions outputted by a keras model into a list with 1 represented the predicted output and zero 
    in other classes. 
    """
    l =[]
    max_prediction = 0
    
    #Finds the highest prediction
    for p in predictions:
        if p > max_prediction:
            max_prediction = p
    
    #Scales predictions and determines if it is 1 or 0
    for p in predictions: 
        p = p/max_prediction
        if p >= 0.5:
            l.append(1)
        else:
            l.append(0)
    return l

In [30]:
def runTest(pooling, dropout, filter_info, loss, activation, final_activation, file_name='model.txt', model_name='model', save_model=False, epochs=5, batch_size=32):
    #Dr. Patterson - you will need to update this line of code for it to work in your directory
    recording_ids_dict, xy = import_data('/Users/jonathanlee/Desktop/Python/NICU/NICU_data')

    matrices = {}
    scores = {}
    model_scores = {}
    str_model =''

    #This essentially uses KFold cross validation using every recording Id as a new fold
    for i in recording_ids_dict:
        print('Testing on ' + str(i))
        train_ids = numpy.concatenate(recording_ids.values())
        test_ids = recording_ids_dict[i]

        #This should run over far less records than re-iterating over the entire dict of ids        
        for j in train_ids:
            train_ids.remove(j)
        
        #This creates an even amount of examples of CSGM and not CSGM
        num_is = 0
        num_isnt = 0
        is_ids = []
        
        for j in train_ids:
            if xy.get(j)[0] == 0: 
                num_isnt += 1
            else:
                num_is += 1
                is_ids.append(j)
            
        print('Num is %d and num isn\'t %d' %(num_is, num_isnt))
        for j in range(num_isnt-num_is): 
            train_ids.append(is_ids[(j % len(is_ids))])
            
        print('Length ' + str(len(train_ids))) 
            
            
            
        x_rgb_train, x_depth_train, y_train = create_array(train_ids, xy)
        x_rgb_test, x_depth_test, y_test = create_array(test_ids, xy)
        
        #Scaling the values to a value between 0 and 1
        x_rgb_train = x_rgb_train.astype('float32')
        x_rgb_test = x_rgb_test.astype('float32')
        x_rgb_train /= 255
        x_rgb_test /= 255
        x_depth_train = x_depth_train.astype('float32')
        x_depth_test = x_depth_test.astype('float32')
        x_depth_train /= 255
        x_depth_test /= 255
        
        depth_model,depth_str_model = create_cnn(x_depth_train,
                                     filter_info=filter_info,
                                     dropout=dropout,
                                     pooling=pooling,
                                     activation=activation)
        
        rgb_model,rgb_str_model = create_cnn(x_rgb_train,
                                     filter_info=filter_info,
                                     dropout=dropout,
                                     pooling=pooling,
                                     activation=activation)
       
        
        #The rest of the model empoys the functional api of Keras
        rgb_input = Input(shape=(x_rgb_train.shape[1:]))
        encoded_rgb = rgb_model(rgb_input)
        
        depth_input = Input(shape=(x_depth_train.shape[1:]))
        encoded_depth = depth_model(depth_input)
        
        
        merged = keras.layers.concatenate([encoded_rgb, encoded_depth])
        a = Dense(256)(merged)
        b = Dense(512)(a)
        #Fit the model
        output = Dense(1, activation=final_activation)(b)
        
        model = Model(inputs=[rgb_input, depth_input], outputs=output)
        
        model.compile(optimizer='sgd',
              loss=loss,
              metrics=['accuracy'])
    
        model.fit([x_rgb_train, x_depth_train], 
                  [y_train],
                  epochs=epochs,
                  batch_size=batch_size)
        
    
        #Create predictions and evaluate to find loss and accuaracy
        model_score = model.evaluate(x =[x_rgb_test, x_depth_test], y=y_test)
        predict = model.predict([x_rgb_test, x_depth_test])
        print(predict)
        print('Model was ' + str(model_score[1]) + '% accurate and exhibited an average loss of ' + str(model_score[0]) + '.')
        
        matrix,score = confusion_matrix(y_test, predict)
        
        matrices.update({i : matrix})
        print(str(matrix) + '\n')
        scores.update({i: score})
        print(str(score) + '\n')
        model_scores.update({i:model_score})
   
    
    
    
    matrix = pd.DataFrame(np.zeros((2,2)) , ['P0','P1'], ['E0','E1'])
    
    """
    Visulaization of the df
       E0 E1
    P0
    P1
    """
    
    num_predictions = 0
    
    for i in range(2): 
        for j in range(2):
            sum_temp = 0
            for baby in matrices: 
                matrix.iloc[[i],[j]] += matrices.get(baby).iloc[i][j]
                num_predictions += matrices.get(baby).iloc[i][j]
                
                
    diagonal_sum = 0
    for i in range(2):
        diagonal_sum += matrix.iloc[i][i]
        
    final_score = diagonal_sum/num_predictions
    
    #Holding that CSGM is positive and not CSGM is negative
    true_pos = matrix['E1']['P1'] + matrix['E0']['P0']
    false_pos = matrix['E0']['P1']  
    false_negative = matrix['E1']['P0']
    
    precision = true_pos/(true_pos+false_pos)
    recall =  true_pos/(true_pos+false_negative)
    
    
        
        
    with open(file_name, 'w') as f:
        for key in matrices:
            f.write("Baby %s\n" % key)
            f.write("%s\n" % str_model)
            f.write("%s\n" % matrices[key])
            f.write("%s\n" % scores[key])
            f.write("%s\n" % model_scores[key])
        f.write('Cumlative Statistics\n')
        f.write("%s\n" % matrix)
        f.write('Final score is %s\n' % final_score)
        f.write('Precision of this model is %s\n' % precision)
        f.write('Recall of this model is %s\n' % recall)
        
            
    if save_model : 
        model.save(model)
    #Add a final matrix 

In [31]:
#File name for the statistics to be save in. Must include .txt at the end
file_name = 'late_integration.txt'

#Must have the h5py package installed or the model will not save. This should be the path of the location you would like
#To save the model
model_file_name = 'test'

filter_info={0:[32,3]}
dropout={0:0.25}
pooling={0:2}


runTest(file_name=file_name, 
        filter_info=filter_info, 
        dropout=dropout, 
        pooling=pooling, 
        loss='binary_crossentropy', 
        activation='relu',
        epochs=1, 
        batch_size=16,
        final_activation='sigmoid')



Testing on 2
Num is 23 and num isn't 27
Length 54
Overview of Model Architecture: 
2D Convulution Layer with 32 filters the size of (3,3) and relu activation 
2D Convulution Layer with 32 filters the size of (3,3) and relu activation 
2D Pooling Max Pooling Layer with filter size (2,2)
Droput Layer with with a rate of 0.250000 
Flatten

Overview of Model Architecture: 
2D Convulution Layer with 32 filters the size of (3,3) and relu activation 
2D Convulution Layer with 32 filters the size of (3,3) and relu activation 
2D Pooling Max Pooling Layer with filter size (2,2)
Droput Layer with with a rate of 0.250000 
Flatten

Epoch 1/1
[[0.5051947 ]
 [0.50510025]
 [0.5072637 ]
 [0.5048847 ]
 [0.50715727]
 [0.5051833 ]
 [0.50573593]
 [0.5058228 ]
 [0.50674194]
 [0.505481  ]
 [0.51400554]
 [0.5053894 ]
 [0.5027432 ]
 [0.5040253 ]
 [0.508105  ]
 [0.5042448 ]
 [0.50559807]
 [0.5083286 ]
 [0.50626665]
 [0.50463253]
 [0.5074105 ]
 [0.5070746 ]
 [0.50418276]
 [0.5055795 ]
 [0.50346094]
 [0.5055504 