#### 

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
from functools import reduce
import cv2 
import math
import pickle
from mpl_toolkits.axes_grid1 import ImageGrid
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from sklearn.metrics import precision_recall_fscore_support

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from keras.applications import ResNet50
from keras.applications.resnet import preprocess_input
from keras.callbacks import LambdaCallback

from scipy.optimize import minimize

In [4]:
if os.path.split(os.getcwd())[-1] == 'notebooks': os.chdir(os.path.split(os.getcwd())[-2])
print(os.getcwd())

C:\Users\smcko\Documents\programming\ocdisrecog-analytics


In [5]:
header, images = pickle.load(open('data/preprocessed_array_150x150.pkl','rb'))

In [6]:
SHAPE = (150,150,3)

In [7]:
Y = header[:,7:15].astype(int)

In [8]:
X_train, X_test, y_train, y_test, ixes_train, ixes_test = train_test_split(images, Y, np.arange(images.shape[0]),
                                                    test_size=.1, stratify=header[:,16], random_state=0)

In [9]:
X_train, X_cv, y_train, y_cv = train_test_split(X_train, y_train, test_size=.1, stratify=header[ixes_train,16], random_state=0)

In [10]:
X_train = X_train.reshape(X_train.shape[0], 150, 150, 3)
X_cv = X_cv.reshape(X_cv.shape[0], 150, 150, 3)

## Model - Self Designed

## Functions

In [39]:
def f1_callback(X_train, y_train, X_cv, y_cv, model):
    def testmodel(epoch, logs):
        #predx, predy = next(data_iterator)
        print(epoch)
        print(logs.keys())
        
        y_pred = model.predict(
            X_train, batch_size=50
        )

        y_pred_cv = model.predict(X_cv, batch_size=50)

        threshold = find_threshold(y_train, y_pred)
        
        

        print("\nTraining F1 Score\n")
        print(multilabel_f1score(y_train, apply_thresh(y_pred, threshold)))
        print("\nValidation F1 Score\n")
        print(multilabel_f1score(y_cv, apply_thresh(y_pred_cv, threshold)))
    return testmodel
        
        
        

def f1(ar, ar_pred):
    tp = len(ar[(ar==ar_pred) & (ar_pred==1)])
    fp = len(ar[(ar!=ar_pred) & (ar==1)])
    fn = len(ar[(ar!=ar_pred) & (ar_pred==1)])
   # print(tp,fn,fp)
    if ((tp+fp) == 0) or ((tp+fn) == 0): return 0
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    if (precision+recall) == 0: return 0
    return 2*(precision*recall)/(precision+recall)

def multilabel_f1score(y, y_pred):
    scores=[]
    for col_ix in range(y.shape[1]):
        scores.append(f1(y[:, col_ix], y_pred[:, col_ix]))
    return np.mean(scores), scores

def apply_thresh(y_pred, threshold):
    out = y_pred.copy()
    out[out>=threshold] = 1
    out[out<threshold] = 0
    return out


def threshold_func(x, y, y_pred):
    score, _ = multilabel_f1score(y, apply_thresh(y_pred, x))
    return 1-score

def find_threshold(y, y_pred):
    threshold = minimize(threshold_func, .25, args=(y, y_pred), method='Nelder-Mead')
    return threshold.x

## Simple model

In [40]:
model_simple = Sequential()

In [41]:
model_simple.add(Conv2D(8, kernel_size=5, padding='same', activation="relu", input_shape=SHAPE))
model_simple.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))

model_simple.add(Conv2D(16, kernel_size=5, padding='same', activation="relu"))
model_simple.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))

model_simple.add(Conv2D(32, kernel_size=5, padding='same', activation="relu"))
model_simple.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))

model_simple.add(Conv2D(64, kernel_size=5, padding='same', activation="relu"))
model_simple.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))

model_simple.add(Conv2D(128, kernel_size=5, padding='same', activation="relu"))
model_simple.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))


model_simple.add(Flatten())
model_simple.add(Dense(8, activation='sigmoid'))

In [42]:
model_simple.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 150, 150, 8)       608       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 74, 74, 8)         0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 74, 74, 16)        3216      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 36, 36, 16)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 36, 36, 32)        12832     
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 17, 17, 32)        0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 17, 17, 64)       

In [43]:
model_simple.compile(optimizer='adam', loss='binary_crossentropy')


In [44]:
testmodelcb = LambdaCallback(on_epoch_end=f1_callback(X_train, y_train, X_cv, y_cv, model_simple))

In [46]:
model_simple.fit(X_train, y_train,
                        epochs=10,
                        verbose=True,
                        validation_data=(X_cv, y_cv),
                        batch_size=20, callbacks=[testmodelcb])

Epoch 1/10
dict_keys(['loss', 'val_loss'])

Training F1 Score

(0.38906174502960544, [0.5071301247771836, 0.5016067776803973, 0.3798449612403101, 0.6161290322580645, 0.18952618453865339, 0.10050251256281406, 0.4176285414480587, 0.4001258257313621])

Validation F1 Score

(0.3482330525276106, [0.4879356568364612, 0.5026315789473684, 0.2278481012658228, 0.5901639344262295, 0.04081632653061225, 0.09090909090909091, 0.42553191489361697, 0.4200278164116829])
Epoch 2/10
dict_keys(['loss', 'val_loss'])

Training F1 Score

(0.4219175757479685, [0.5292876845692033, 0.5544091882421647, 0.39279869067103107, 0.5930851063829786, 0.24746450304259637, 0, 0.6321626617375231, 0.4261327713382507])

Validation F1 Score

(0.38362749552094777, [0.5166908563134979, 0.5335689045936395, 0.2571428571428571, 0.5526315789473685, 0.1904761904761905, 0, 0.5964912280701754, 0.42201834862385323])
Epoch 3/10
dict_keys(['loss', 'val_loss'])

Training F1 Score

(0.3937961513946461, [0.4998537584088915, 0.527666399358460

<tensorflow.python.keras.callbacks.History at 0x2332e2462b0>

In [92]:
y_pred_simple = model_simple.predict(X_train)

In [128]:
find_threshold(y_train, y_pred_simple)[0]
#threshold_func(.2, y_train, y_pred_simple)

0.20566406249999997

In [172]:
model.add(Conv2D(8, kernel_size=3, padding='same', activation="relu", input_shape=SHAPE))
model.add(MaxPooling2D(pool_size=(4,4), strides=(1,1)))

model.add(Conv2D(16, kernel_size=3, padding='same', activation="relu"))
model.add(MaxPooling2D(pool_size=(4,4), strides=(1,1)))

model.add(Conv2D(32, kernel_size=3, padding='same', activation="relu"))
model.add(MaxPooling2D(pool_size=(4,4), strides=(1,1)))

model.add(Conv2D(64, kernel_size=3, padding='same', activation="relu"))
model.add(MaxPooling2D(pool_size=(4,4), strides=(1,1)))

model.add(Conv2D(128, kernel_size=3, padding='same', activation="relu"))
model.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))

model.add(Conv2D(256, kernel_size=3, padding='same', activation="relu"))
model.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))

#model.add(Conv2D(256, kernel_size=3, padding='same', activation="relu"))
#model.add(MaxPooling2D(pool_size=(4,4), strides=(2,2)))




model.add(Flatten())
model.add(Dense(8, activation='sigmoid'))

In [173]:
model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_62 (Conv2D)           (None, 75, 75, 8)         224       
_________________________________________________________________
max_pooling2d_55 (MaxPooling (None, 72, 72, 8)         0         
_________________________________________________________________
conv2d_63 (Conv2D)           (None, 72, 72, 16)        1168      
_________________________________________________________________
max_pooling2d_56 (MaxPooling (None, 69, 69, 16)        0         
_________________________________________________________________
conv2d_64 (Conv2D)           (None, 69, 69, 32)        4640      
_________________________________________________________________
max_pooling2d_57 (MaxPooling (None, 66, 66, 32)        0         
_________________________________________________________________
conv2d_65 (Conv2D)           (None, 66, 66, 64)      

In [174]:

model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(X_train, y_train,
                        epochs=10,
                        verbose=True,
                        validation_data=(X_cv, y_cv),
                        batch_size=20, callbacks=[])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2af093eca90>

In [175]:
y_pred_cv = model.predict(X_cv)

In [25]:
#F1 Score = 2 * (precision * recall) / (precision + recall)
# precision = tp/(tp+fp) recall = tp/(tp+fn)
def f1(ar, ar_pred):
    tp = len(ar[(ar==ar_pred) & (ar_pred==1)])
    fp = len(ar[(ar!=ar_pred) & (ar==1)])
    fn = len(ar[(ar!=ar_pred) & (ar_pred==1)])
    print(tp,fn,fp)
    if ((tp+fp) == 0) or ((tp+fn) == 0): return 0
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    if (precision+recall) == 0: return 0
    return 2*(precision*recall)/(precision+recall)

def multilabel_f1score(y, y_pred):
    scores=[]
    for col_ix in range(y.shape[1]):
        scores.append(f1(y[:, col_ix], y_pred[:, col_ix]))
    return np.mean(scores), scores
        

In [177]:
y_pred = model.predict(X_train)

In [26]:
def apply_thresh(y_pred, threshold):
    out = y_pred.copy()
    out[out>=threshold] = 1
    out[out<threshold] = 0
    return out

In [187]:
multilabel_f1score(y_train, apply_thresh(y_pred,.19))

1625 2863 84
1611 2638 108
66 63 247
241 208 89
16 22 234
14 10 147
162 110 87
1146 3000 132


(0.41107762660487934,
 [0.5244473132160723,
  0.5398793565683647,
  0.2986425339366516,
  0.6187419768934532,
  0.1111111111111111,
  0.15135135135135133,
  0.6218809980806143,
  0.4225663716814159])

In [188]:
multilabel_f1score(y_cv, apply_thresh(y_pred_cv, .19))

172 333 12
174 301 17
6 8 32
23 21 11
0 3 33
1 4 18
14 12 14
124 320 30


(0.3573596530682257,
 [0.4992743105950654,
  0.5225225225225225,
  0.23076923076923078,
  0.5897435897435898,
  0,
  0.08333333333333333,
  0.5185185185185186,
  0.4147157190635451])

In [141]:
np.mean([0.        , 0.22488314, 0.32800321, 0.01568627, 0.03556188,
       0.5681946 , 0.62132543, 0.0204434 ])

0.22676224125

# Pre trained model

In [66]:
X_train_resnet = preprocess_input(X_train)
X_cv_resnet = preprocess_input(X_cv)

In [67]:
model2 = Sequential()

In [68]:
model2.add(ResNet50(include_top=False, weights='imagenet', input_shape=(150,150,3)))

In [69]:
model2.add(Flatten())

In [70]:
model2.add(Dense(8, activation='sigmoid'))

In [71]:
model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Functional)        (None, 5, 5, 2048)        23587712  
_________________________________________________________________
flatten_2 (Flatten)          (None, 51200)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 409608    
Total params: 23,997,320
Trainable params: 23,944,200
Non-trainable params: 53,120
_________________________________________________________________


In [72]:
model2.compile(optimizer='adam', loss='binary_crossentropy')

In [73]:
model2.fit(X_train_resnet, y_train,
                        epochs=10,
                        verbose=True,
                        validation_data=(X_cv_resnet, y_cv),
                        batch_size=20, callbacks=[])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1ed90e8a4c0>

In [74]:
y_resnet_pred = model2.predict(X_train_resnet)

In [75]:
y_resnet_pred_cv = model2.predict(X_cv_resnet)

In [83]:
multilabel_f1score(y_train, apply_thresh(y_resnet_pred,.125))

1504 1244 205
1570 1567 149
120 24 193
308 811 22
103 50 147
75 87 86
160 3 89
882 771 396


(0.5782381236548413,
 [0.6748934260713485,
  0.6466227347611203,
  0.5251641137855579,
  0.42512077294685985,
  0.511166253101737,
  0.4643962848297214,
  0.7766990291262135,
  0.601842374616172])

In [84]:
multilabel_f1score(y_cv, apply_thresh(y_resnet_pred_cv,.125))

131 209 53
145 225 46
3 6 35
26 90 8
5 12 28
3 10 16
14 3 14
47 127 107


(0.34844598444455543,
 [0.5,
  0.5169340463458111,
  0.12765957446808512,
  0.3466666666666667,
  0.19999999999999998,
  0.18749999999999997,
  0.6222222222222222,
  0.2865853658536585])

In [None]:
(0.3573596530682257,
 [0.4992743105950654,
  0.5225225225225225,
  0.23076923076923078,
  0.5897435897435898,
  0,
  0.08333333333333333,
  0.5185185185185186,
  0.4147157190635451])