In [1]:

import tensorflow as tf
import pickle
import os 
import cv2
import numpy as np
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score
print(tf.__version__)



2.3.3


In [2]:
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"

In [3]:
! export TF_FORCE_GPU_ALLOW_GROWTH=true

In [2]:
IMAGE_SIZE = (224,224,3)

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [4]:
###
### Use all GPUs.
###
strategy = tf.distribute.MirroredStrategy()
print()
print()
print()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)



Number of devices: 1


2021-11-12 15:28:45.693383: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-12 15:28:45.713676: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f85af18c270 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-11-12 15:28:45.713692: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version


In [5]:
def make_model_2():
    def build_convnet(shape=None):
        momentum = 0.9
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Conv2D(64, (3,3), input_shape=shape[1:], padding='same', activation='linear'))
        model.add(tf.keras.layers.BatchNormalization(momentum=momentum))
        model.add(tf.keras.layers.Activation('relu'))

        model.add(tf.keras.layers.MaxPool2D())

        model.add(tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='linear'))
        model.add(tf.keras.layers.BatchNormalization(momentum=momentum))
        model.add(tf.keras.layers.Activation('relu'))

        model.add(tf.keras.layers.MaxPool2D())

        model.add(tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='linear'))
        model.add(tf.keras.layers.BatchNormalization(momentum=momentum))
        model.add(tf.keras.layers.Activation('relu'))

        # flatten
        model.add(tf.keras.layers.GlobalMaxPool2D())
        return model
    shape = (90, IMAGE_SIZE[0], IMAGE_SIZE[1], IMAGE_SIZE[2])
    print('Train data shape: ', shape)

    convnet = build_convnet(shape)

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.TimeDistributed(convnet, input_shape=shape))
    model.add(tf.keras.layers.LSTM(64))

    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.Dropout(.5))
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dropout(.5))
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    model.add(tf.keras.layers.Dropout(.5))
    model.add(tf.keras.layers.Dense(16, activation='relu'))

    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    
    return model
        

In [6]:
def make_model(): 
    inp =  tf.keras.layers.Input((None, IMAGE_SIZE[0], IMAGE_SIZE[1], IMAGE_SIZE[2])) # , ragged=True
    
    mobilenet_model = tf.keras.applications.mobilenet_v2.MobileNetV2(
        include_top=False, weights='imagenet', pooling='max', classes=2, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
    )
    
    for k,v in mobilenet_model._get_trainable_state().items():
        k.trainable = False
    
#     mobilenet_model = tf.keras.models.Sequential()
#     mobilenet_model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)))
#     mobilenet_model.add(tf.keras.layers.MaxPooling2D((2, 2)))
#     mobilenet_model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu'))
# #     mobilenet_model.add(tf.keras.layers.MaxPooling2D((2, 2)))
# #     mobilenet_model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
#     mobilenet_model.add(tf.keras.layers.Flatten())
    
    x = tf.keras.layers.TimeDistributed(mobilenet_model)(inp)
    x = tf.keras.layers.LSTM(64, return_sequences=False)(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    out = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)

    model = tf.keras.Model(inp, out)

    model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer = tf.keras.optimizers.Adam(learning_rate=0.01), metrics = ['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

    return model

In [9]:
# model = make_model()

# model.build(input_shape=IMAGE_SIZE)
# model.summary()

In [10]:
all_accs = []
all_pres = []
all_recs = []
all_f1s = []

In [7]:
###
### CHANGE THIS TO THE CORRECT STARTING PLACE BEFORE RUNNING THE EXPERIMENT AGAIN!!!
### This applies if the notebook connection was interrupted during the middle of
### the training process..
###
STARTING_FOLD_INDEX = 319

In [8]:
from scipy import interp 
import pickle, numpy as np
from sklearn.metrics import roc_curve, roc_auc_score
base_fpr = np.linspace(0, 1, 101)

In [9]:
def get_means(TPRS, aurocs): 
    
    mean_tpr = np.mean(TPRS, axis=0)

    # plot the auroc curves 
    mean_auroc = sum(aurocs) / len(aurocs)

    return mean_tpr, mean_auroc

In [10]:

FOLD_TPRS = [] # all the saved TPRS
FOLD_AUROCS = [] # all the saved AUROCs
META_RESULT_MATRIX = [] # all the saved results

with open("file_names_folds.pkl", 'rb') as f: 
    SEEDS, FOLD_FILES = pickle.load(f)

index_to_start_at = STARTING_FOLD_INDEX

TPRS, FPRS, local_aurocs = [],[], []
for fold in FOLD_FILES[index_to_start_at:]:

    print('FOLD::: ', fold)

    train_files = [a.strip('_') for a in fold['train']]
    test_files = [a.strip('_') for a in fold['test']]

    X_train = []
    X_test = []

    y_train = []
    y_test = []

    for filename in train_files:
        filename_int = int(filename.split('.mp4')[0])

        if filename_int <= 115:
            curr_y = 1
            subdir_name = 'armflapping'
        else:
            curr_y = 0
            subdir_name = 'control'

        curr_x = []
        for frame in os.listdir('behavior_data/' + subdir_name + '/' + filename):

            frame_num = int(frame.split('.')[0])
            if frame_num > 90:
                continue

            image = cv2.imread('behavior_data/' + subdir_name + '/' + filename + '/' + frame)
            try:
                image = image.reshape((image.shape[0], image.shape[1], image.shape[2]))
            except:
                continue

            image = cv2.resize(image, (224, 224))
            curr_x.append(image)

        len_data = len(os.listdir('behavior_data/' + subdir_name + '/' + filename))
        if len_data < 90:
            for abc in range(len_data, 90):
                curr_x.append(np.zeros((224, 224, 3)))

        curr_x = np.array(curr_x)

        X_train.append(curr_x)
        y_train.append(curr_y)

    for filename in test_files:
        filename_int = int(filename.split('.mp4')[0])

        if filename_int <= 115:
            curr_y = 1
            subdir_name = 'armflapping'
        else:
            curr_y = 0
            subdir_name = 'control'

        curr_x = []
        for frame in os.listdir('behavior_data/' + subdir_name + '/' + filename):

            frame_num = int(frame.split('.')[0])
            if frame_num > 90:
                continue

            image = cv2.imread('behavior_data/' + subdir_name + '/' + filename + '/' + frame)
            try:
                image = image.reshape((image.shape[0], image.shape[1], image.shape[2]))
            except:
                continue

            image = cv2.resize(image, (224, 224))
            curr_x.append(image)

        len_data = len(os.listdir('behavior_data/' + subdir_name + '/' + filename))
        if len_data < 90:
            for abc in range(len_data, 90):
                curr_x.append(np.zeros((224, 224, 3)))

        curr_x = np.array(curr_x)

        X_test.append(curr_x)
        y_test.append(curr_y)

    X_train = np.array(X_train)
    X_test = np.array(X_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)

    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    model = make_model()

    model.compile(loss = tf.keras.losses.BinaryCrossentropy(), 
                    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001), 
                    metrics = [['accuracy', tf.keras.metrics.Precision(name="precision"), tf.keras.metrics.Recall(name="recall")]])

    history = model.fit(X_train, 
                        y_train, 
                        #validation_data = (X_test, y_test),
                        batch_size = 16,
                        epochs = 60)


    ###
    ### EVALUATE HERE!!!!!
    ###

    predictions = []
    trues = []
    for i in range(X_test.shape[0]):
        X_to_predict = np.array([X_test[i]])
        pred = model(X_to_predict)
        print(pred)
        true = y_test[i]
        if pred < 0.5:
            predictions.append(0)
        else:
            predictions.append(1)
        trues.append(true)

    acc = accuracy_score(trues, predictions)
    pre = precision_score(trues, predictions)
    rec = recall_score(trues, predictions)
    f1 = f1_score(trues, predictions)

    print('\n\n\n\n\n\n\n  ', acc, pre, rec, f1, ' \n\n\n\n\n\n\n')

    all_accs.append(acc)
    all_pres.append(pre)
    all_recs.append(rec)
    all_f1s.append(f1)

    # get the training scores 
    training_accuracy = history.history['accuracy'][-1]
    training_precision = history.history['precision'][-1]
    training_recall = history.history['recall'][-1]
    training_f1 = 2 * training_precision * training_recall / (training_precision + training_recall + tf.keras.backend.epsilon())

    with open('performances/' + str(STARTING_FOLD_INDEX) + '_results.txt', 'w') as f:
        f.write('Training Accuracy: ' + str(training_accuracy) + '\n')
        f.write('Training Precision: ' + str(training_precision) + '\n')
        f.write('Training Recall: ' + str(training_recall) + '\n')
        f.write('Training F1: ' + str(training_f1) + '\n')
        f.write('Validation Accuracy: ' + str(acc) + '\n')
        f.write('Validation Precision: ' + str(pre) + '\n')
        f.write('Validation Recall: ' + str(rec) + '\n')
        f.write('Validation F1: ' + str(f1) + '\n')
        f.write('AUROC: ' + str(roc_auc_score(trues, predictions)) + '\n')
        f.close()
        
    STARTING_FOLD_INDEX += 1

    fpr, tpr, _ = roc_curve(trues, predictions) 
    tpr = interp(base_fpr, fpr, tpr)
    tpr[0] = 0.0 

    with open("tprs/" + str(STARTING_FOLD_INDEX) + "_tprs.pickle", "wb") as f:
        pickle.dump(tpr, f)
    

FOLD:::  {'train': ['_113.mp4', '_75.mp4', '_88.mp4', '_61.mp4', '_76.mp4', '_49.mp4', '_17.mp4', '_101.mp4', '_14.mp4', '_65.mp4', '_100.mp4', '_89.mp4', '_99.mp4', '_29.mp4', '_98.mp4', '_2.mp4', '_102.mp4', '_105.mp4', '_114.mp4', '_110.mp4', '_0.mp4', '_72.mp4', '_107.mp4', '_60.mp4', '_10.mp4', '_63.mp4', '_48.mp4', '_115.mp4', '_11.mp4', '_38.mp4', '_1.mp4', '_3.mp4', '_77.mp4', '_39.mp4', '_104.mp4', '_106.mp4', '_112.mp4', '_5.mp4', '_28.mp4', '_111.mp4', '_127.mp4', '_167.mp4', '_171.mp4', '_173.mp4', '_164.mp4', '_172.mp4', '_130.mp4', '_140.mp4', '_120.mp4', '_163.mp4', '_181.mp4', '_165.mp4', '_160.mp4', '_122.mp4', '_161.mp4', '_126.mp4', '_186.mp4', '_118.mp4', '_142.mp4', '_128.mp4', '_132.mp4', '_174.mp4', '_137.mp4', '_158.mp4', '_144.mp4', '_175.mp4', '_166.mp4', '_169.mp4', '_178.mp4', '_151.mp4', '_116.mp4', '_153.mp4', '_159.mp4', '_187.mp4', '_129.mp4', '_134.mp4', '_152.mp4', '_156.mp4', '_136.mp4', '_124.mp4'], 'test': ['_7.mp4', '_12.mp4', '_71.mp4', '_74.mp4',

In [None]:
# put this in a pickle file to send to anish 
"""
import pickle
with open("mobile_net_results.pkl", 'wb') as f: 
    pickle.dump((META_RESULT_MATRIX, FOLD_AUROCS, FOLD_TPRS), f)
"""

### Describe summary statistics for all 500 runs.

In [18]:
all_accs = []
all_pres = []
all_recs = []
all_f1s = []

for file in os.listdir('performances/'):
    with open('performances/' + file) as f:
        lines = f.readlines()
        
        acc = float(lines[0].split(': ')[1].strip('\n'))
        pre = float(lines[1].split(': ')[1].strip('\n'))
        rec = float(lines[2].split(': ')[1].strip('\n'))
        f1 = float(lines[3].split(': ')[1].strip('\n'))
        
        all_accs.append(acc)
        all_pres.append(pre)
        all_recs.append(rec)
        all_f1s.append(f1)
    

from scipy import stats
print('Acc: ', stats.describe(all_accs))
print('Pre: ', stats.describe(all_pres))
print('Rec: ', stats.describe(all_recs))
print('F1: ', stats.describe(all_f1s))

TypeError: cannot perform reduce with flexible type

In [27]:
'''
THIS IS HOW WE WILL HAVE TO EVALUATE PREDICTIONS!!!!!

https://stackoverflow.com/questions/56711354/tensorflow-v2-cancellederror-opstatefulpartitionedcall
'''

from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score

with strategy.scope():
    predictions = []
    trues = []
    for i in range(X_test.shape[0]):
        X_to_predict = np.array([X_test[i]])
        pred = model(X_to_predict)
        print(pred)
        true = y_test[i]
        if pred < 0.5:
            predictions.append(0)
        else:
            predictions.append(1)
        trues.append(true)
        
    print('Accuracy: ', accuracy_score(trues, predictions))
    print('Precision: ', precision_score(trues, predictions))
    print('Recall: ', recall_score(trues, predictions))
    print('F1 Score: ', f1_score(trues, predictions))
    

tf.Tensor([[0.8247404]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9714775]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9940625]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.99450547]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9829216]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9581735]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9916317]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9885062]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.81362104]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.9727558]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.05049171]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.4205745]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.1080795]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.04499972]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.0348262]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.04339964]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.03726655]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.01246449]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.6749961