In [1]:
import torch
import torchvision
import torchvision.models as models
from PIL import Image, ImageFile
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import gc; gc.enable()
import pandas as pd
from torch.utils.data import Dataset
import tensorflow as tf
from tensorflow import keras
from tensorflow.layers import Dense
from torchvision import transforms

In [3]:
# filter warnings
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
# keras imports
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.applications.vgg19 import VGG19, preprocess_input
# from keras.applications.xception import Xception, preprocess_input
# from keras.applications.resnet50 import ResNet50, preprocess_input
# from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
# from keras.applications.mobilenet import MobileNet, preprocess_input
# from keras.applications.inception_v3 import InceptionV3, preprocess_input
# from keras.preprocessing import image
# from keras.models import Model
from keras.models import model_from_json
from keras.layers import Input
# other imports
from sklearn.preprocessing import LabelEncoder
import numpy as np
import glob
import cv2
import h5py
import os
import json
import datetime
import time
# load the user configs
with open('conf/conf.json') as f:    
    config = json.load(f)
# config variables
model_name = config["model"]
weights = config["weights"]
include_top = config["include_top"]
train_path = config["train_path"]
features_path = config["features_path"]
labels_path = config["labels_path"]
test_size = config["test_size"]
results = config["results"]
model_path = config["model_path"]
# start time
print ("[STATUS] start time - {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
start = time.time()
# create the pretrained models
# check for pretrained weight usage or not
# check for top layers to be included or not
if model_name == "vgg16":
    base_model = VGG16(weights=weights)
    model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)
    image_size = (224, 224)
elif model_name == "vgg19":
    base_model = VGG19(weights=weights)
    model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)
    image_size = (224, 224)
elif model_name == "resnet50":
    base_model = keras.applications.ResNet50(include_top=False, weights=weights)
    model = base_model
#     model = Model(input=base_model.input, output=base_model.layers[-1].output)
    image_size = (224, 224)
elif model_name == "inceptionv3":
    base_model = InceptionV3(include_top=include_top, weights=weights, input_tensor=Input(shape=(299,299,3)))
    model = Model(input=base_model.input, output=base_model.get_layer('custom').output)
    image_size = (299, 299)
elif model_name == "inceptionresnetv2":
    base_model = InceptionResNetV2(include_top=include_top, weights=weights, input_tensor=Input(shape=(299,299,3)))
#     model = Model(input=base_model.input, output=base_model.layers[-1].output)
    image_size = (299, 299)
elif model_name == "mobilenet":
    base_model = MobileNet(include_top=include_top, weights=weights, input_tensor=Input(shape=(224,224,3)), input_shape=(224,224,3))
    model = Model(input=base_model.input, output=base_model.get_layer('custom').output)
    image_size = (224, 224)
elif model_name == "xception":
    base_model = Xception(weights=weights)
    model = Model(input=base_model.input, output=base_model.get_layer('avg_pool').output)
    image_size = (299, 299)
else:
    base_model = None
print ("[INFO] successfully loaded base model and model...")


[STATUS] start time - 2019-08-22 13:06
Instructions for updating:
Colocations handled automatically by placer.




[INFO] successfully loaded base model and model...


In [4]:
def getRadius(img):
#     print(type(img.shape))
    circles = cv2.HoughCircles(img,3,1,max(img.shape)/2,param1=50,param2=30,minRadius= int(max(img.shape)/6),maxRadius=max(img.shape))
    return circles[0][0][2]

In [8]:
original_dataset_dir = '/Users/flatironschool/Documents/Kaggle/Kaggle-DR-detection/'
train_dir = original_dataset_dir+'dataset/train_images/'
#read in table with image ids and retinopathy severity rating for associated image
labels = pd.read_csv(original_dataset_dir + 'train.csv')

In [13]:
#one-hot encode retinopathy severity
pd_diagnoses = pd.get_dummies(labels['diagnosis'])

In [14]:
diagnoses = keras.utils.to_categorical(labels['diagnosis'])

In [15]:
labels_minus_diagnosis = labels.drop(columns = ['diagnosis'])
dummy_labels = pd.concat([labels_minus_diagnosis,pd_diagnoses], axis = 1)

In [17]:
dummy_labels=dummy_labels.set_index('id_code')

In [18]:
t = dummy_labels.transpose()
t_dict = t.to_dict()

In [9]:
sm_path = train_dir + '/sm'
imgs = {}

In [10]:
smudge = []

In [11]:
for img in os.listdir(train_dir):
    if img.endswith('.png'):
        img_array = cv2.imread(os.path.join(train_dir,img))
        #crop each image to 80% of fundus diameter
        try:
            r = getRadius(img_array)
            crop_r = round(r*0.8) 
        except:
            crop_r = (img_array.shape[1]/2)*0.8
            
        center_x = int(img_array.shape[1]/2)
        center_y = int(img_array.shape[0]/2)
    
        left_border = int(center_x - crop_r)
        right_border = int(center_x + crop_r)
        upper_border = int(center_y - crop_r)
        lower_border = int(center_y + crop_r)
    
        if left_border < 0:
            difference = 0- left_border
            left_border = 0
            right_border -=difference
            upper_border += difference
            lower_border -= difference
        if upper_border < 0:
            difference = 0- upper_border
            upper_border = 0
            lower_border -= difference
            left_border += difference
            right_border -= difference

        cropped = img_array[upper_border:lower_border, left_border:right_border]
        #resize image to resnet's expected input size
        a = cv2.resize(cropped, (224,224), interpolation = cv2.INTER_CUBIC)
#         lab = cv2.cvtColor(a, cv2.COLOR_BGR2LAB)
#         lab_planes = cv2.split(lab)
#         clahe = cv2.createCLAHE(clipLimit= 1,tileGridSize=(round(a.shape[0]/6),round(a.shape[1]/6)))
#         lab_planes[0] = clahe.apply(lab_planes[0])
#         lab = cv2.merge(lab_planes)
#         a = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
       
#         a =cv2.medianBlur(a,5)
#         a=cv2.addWeighted(a, 4, cv2.GaussianBlur(a,(0,0), 30), -4, 128)
        a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
        imgs[img[:-4]] = a
        gc.collect()
    
#         os.chdir(sm_path)
#         cv2.imwrite("sm"+"_"+img ,a)
#         os.chdir(path)   
#     print(ctr)

In [19]:
#generate arrays so that severity ratings have same order as their associated images
ordered_imgs = []
label_vectors = []
for key in imgs.keys():
    ordered_imgs.append(imgs[key])
    lbls = []
    for k in t_dict[key]:
        lbls.append(t_dict[key][k])
    label_vectors.append(lbls)


In [53]:
#instantiate pre-trained ResNet model
res_fifty = keras.applications.ResNet50(include_top=False, weights='imagenet')
# for layer in res_fifty.layers:
#     layer.trainable = False



In [63]:
pd.set_option('max_colwidth', -1)

layers = [(layer, layer.name, layer.trainable) for layer in res_fifty.layers]
df_layers = pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])
df_layers

Unnamed: 0,Layer Type,Layer Name,Layer Trainable
0,<tensorflow.python.keras.engine.input_layer.InputLayer object at 0x10c5f1a90>,input_3,False
1,<tensorflow.python.keras.layers.convolutional.ZeroPadding2D object at 0x15701dac8>,conv1_pad,False
2,<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x15701df98>,conv1,False
3,<tensorflow.python.keras.layers.normalization.BatchNormalizationV1 object at 0x15701def0>,bn_conv1,False
4,<tensorflow.python.keras.layers.core.Activation object at 0x15701deb8>,activation_98,False
5,<tensorflow.python.keras.layers.convolutional.ZeroPadding2D object at 0x157060eb8>,pool1_pad,False
6,<tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x157060a58>,max_pooling2d_2,False
7,<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x156fb5fd0>,res2a_branch2a,False
8,<tensorflow.python.keras.layers.normalization.BatchNormalizationV1 object at 0x1568656a0>,bn2a_branch2a,False
9,<tensorflow.python.keras.layers.core.Activation object at 0x156865b38>,activation_99,False


In [62]:
p = 175*0.8
cnt = 0
set_trainable = False
for layer in res_fifty.layers:
    if cnt > p:
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
    cnt+=1

In [74]:
ordered_imgs[0].shape

(224, 224, 3)

In [72]:
datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range = 180,horizontal_flip = True, vertical_flip = True,data_format = 'channels_last')
batch_size = 20
# datagen.fit(ordered_imgs)
 
train_generator = datagen.flow(
    np.asarray(ordered_imgs),
    label_vectors,
    batch_size=batch_size,
    shuffle = False)

train_steps_per_epoch = len(ordered_imgs) // batch_size
model.compile(keras.optimizers.Adam(lr = 0.0001), loss = 'categorical_crossentropy')
history = model.fit_generator(train_generator,
                              steps_per_epoch=train_steps_per_epoch,
                              epochs=15, verbose=1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/15


InvalidArgumentError: Incompatible shapes: [20,5] vs. [20,7,7,2048]
	 [[{{node training/Adam/gradients/loss/activation_48_loss/mul_grad/BroadcastGradientArgs}}]]

In [73]:
%debug

> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py[0m(533)[0;36m__exit__[0;34m()[0m
[0;32m    530 [0;31m    [0;31m# as there is a reference to status from this from the traceback due to[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    531 [0;31m    [0;31m# raise.[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    532 [0;31m    [0;32mfinally[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m--> 533 [0;31m      [0;32mdel[0m [0mself[0m[0;34m.[0m[0mstatus[0m[0;34m[0m[0m
[0m[0;32m    534 [0;31m    [0;32mreturn[0m [0;32mFalse[0m  [0;31m# False values do not suppress exceptions[0m[0;34m[0m[0m
[0m


ipdb>  up


> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py[0m(1445)[0;36m__call__[0;34m()[0m
[0;32m   1443 [0;31m      [0;32mfinally[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m   1444 [0;31m        [0;32mif[0m [0mrun_metadata_ptr[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m-> 1445 [0;31m          [0mtf_session[0m[0;34m.[0m[0mTF_DeleteBuffer[0m[0;34m([0m[0mrun_metadata_ptr[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m   1446 [0;31m      [0;32mreturn[0m [0mret[0m[0;34m[0m[0m
[0m[0;32m   1447 [0;31m[0;34m[0m[0m
[0m


ipdb>  up


> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/keras/backend.py[0m(3076)[0;36m__call__[0;34m()[0m
[0;32m   3074 [0;31m[0;34m[0m[0m
[0m[0;32m   3075 [0;31m    fetched = self._callable_fn(*array_vals,
[0m[0;32m-> 3076 [0;31m                                run_metadata=self.run_metadata)
[0m[0;32m   3077 [0;31m    [0mself[0m[0;34m.[0m[0m_call_fetch_callbacks[0m[0;34m([0m[0mfetched[0m[0;34m[[0m[0;34m-[0m[0mlen[0m[0;34m([0m[0mself[0m[0;34m.[0m[0m_fetches[0m[0;34m)[0m[0;34m:[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m   3078 [0;31m    return nest.pack_sequence_as(self._outputs_structure,
[0m


ipdb>  up


> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py[0m(1191)[0;36mtrain_on_batch[0;34m()[0m
[0;32m   1189 [0;31m      [0;32melse[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m   1190 [0;31m        [0mself[0m[0;34m.[0m[0m_make_fit_function[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m-> 1191 [0;31m        [0moutputs[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0m_fit_function[0m[0;34m([0m[0mins[0m[0;34m)[0m  [0;31m# pylint: disable=not-callable[0m[0;34m[0m[0m
[0m[0;32m   1192 [0;31m[0;34m[0m[0m
[0m[0;32m   1193 [0;31m    [0;32mif[0m [0mreset_metrics[0m[0;34m:[0m[0;34m[0m[0m
[0m


ipdb>  up


> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_generator.py[0m(191)[0;36mmodel_iteration[0;34m()[0m
[0;32m    189 [0;31m      [0mprogbar[0m[0;34m.[0m[0mon_batch_begin[0m[0;34m([0m[0mstep[0m[0;34m,[0m [0mbatch_logs[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    190 [0;31m[0;34m[0m[0m
[0m[0;32m--> 191 [0;31m      [0mbatch_outs[0m [0;34m=[0m [0mbatch_function[0m[0;34m([0m[0;34m*[0m[0mbatch_data[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    192 [0;31m      [0;32mif[0m [0;32mnot[0m [0misinstance[0m[0;34m([0m[0mbatch_outs[0m[0;34m,[0m [0mlist[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m    193 [0;31m        [0mbatch_outs[0m [0;34m=[0m [0;34m[[0m[0mbatch_outs[0m[0;34m][0m[0;34m[0m[0m
[0m


ipdb>  up


> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py[0m(1426)[0;36mfit_generator[0;34m()[0m
[0;32m   1424 [0;31m        [0muse_multiprocessing[0m[0;34m=[0m[0muse_multiprocessing[0m[0;34m,[0m[0;34m[0m[0m
[0m[0;32m   1425 [0;31m        [0mshuffle[0m[0;34m=[0m[0mshuffle[0m[0;34m,[0m[0;34m[0m[0m
[0m[0;32m-> 1426 [0;31m        initial_epoch=initial_epoch)
[0m[0;32m   1427 [0;31m[0;34m[0m[0m
[0m[0;32m   1428 [0;31m  def evaluate_generator(self,
[0m


ipdb>  up


> [0;32m<ipython-input-72-b58656000405>[0m(15)[0;36m<module>[0;34m()[0m
[0;32m     11 [0;31m[0mtrain_steps_per_epoch[0m [0;34m=[0m [0mlen[0m[0;34m([0m[0mordered_imgs[0m[0;34m)[0m [0;34m//[0m [0mbatch_size[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m[0mmodel[0m[0;34m.[0m[0mcompile[0m[0;34m([0m[0mkeras[0m[0;34m.[0m[0moptimizers[0m[0;34m.[0m[0mAdam[0m[0;34m([0m[0mlr[0m [0;34m=[0m [0;36m0.0001[0m[0;34m)[0m[0;34m,[0m [0mloss[0m [0;34m=[0m [0;34m'categorical_crossentropy'[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31mhistory = model.fit_generator(train_generator,
[0m[0;32m     14 [0;31m                              [0msteps_per_epoch[0m[0;34m=[0m[0mtrain_steps_per_epoch[0m[0;34m,[0m[0;34m[0m[0m
[0m[0;32m---> 15 [0;31m                              epochs=15, verbose=1)
[0m


ipdb>  up


*** Oldest frame


ipdb>  down


> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py[0m(1426)[0;36mfit_generator[0;34m()[0m
[0;32m   1424 [0;31m        [0muse_multiprocessing[0m[0;34m=[0m[0muse_multiprocessing[0m[0;34m,[0m[0;34m[0m[0m
[0m[0;32m   1425 [0;31m        [0mshuffle[0m[0;34m=[0m[0mshuffle[0m[0;34m,[0m[0;34m[0m[0m
[0m[0;32m-> 1426 [0;31m        initial_epoch=initial_epoch)
[0m[0;32m   1427 [0;31m[0;34m[0m[0m
[0m[0;32m   1428 [0;31m  def evaluate_generator(self,
[0m


ipdb>  down


> [0;32m/Users/flatironschool/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_generator.py[0m(191)[0;36mmodel_iteration[0;34m()[0m
[0;32m    189 [0;31m      [0mprogbar[0m[0;34m.[0m[0mon_batch_begin[0m[0;34m([0m[0mstep[0m[0;34m,[0m [0mbatch_logs[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    190 [0;31m[0;34m[0m[0m
[0m[0;32m--> 191 [0;31m      [0mbatch_outs[0m [0;34m=[0m [0mbatch_function[0m[0;34m([0m[0;34m*[0m[0mbatch_data[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    192 [0;31m      [0;32mif[0m [0;32mnot[0m [0misinstance[0m[0;34m([0m[0mbatch_outs[0m[0;34m,[0m [0mlist[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m    193 [0;31m        [0mbatch_outs[0m [0;34m=[0m [0;34m[[0m[0mbatch_outs[0m[0;34m][0m[0;34m[0m[0m
[0m


ipdb>  type(batch_data)


<class 'tuple'>


ipdb>  len(batch_data)


2


ipdb>  batch_data[0]


array([[[[ 59.521267 ,  49.7819   ,  41.303165 ],
         [ 61.23142  ,  49.743805 ,  41.11571  ],
         [ 64.99556  ,  51.01332  ,  43.00888  ],
         ...,
         [185.89722  , 101.19628  ,  73.046745 ],
         [169.60521  ,  79.70688  ,  59.404266 ],
         [165.84576  ,  76.69152  ,  56.84576  ]],

        [[ 61.10549  ,  52.070328 ,  44.964836 ],
         [ 59.805256 ,  50.207882 ,  42.013134 ],
         [ 60.37946  ,  49.45982  ,  40.689728 ],
         ...,
         [172.72908  ,  83.82469  ,  61.960155 ],
         [164.9938   ,  74.987595 ,  55.993797 ],
         [167.82487  ,  83.53415  ,  60.32049  ]],

        [[ 63.42908  ,  53.61939  ,  44.190308 ],
         [ 61.53147  ,  52.354317 ,  44.82284  ],
         [ 60.08924  ,  50.63386  ,  42.723103 ],
         ...,
         [164.14183  ,  73.28367  ,  55.141834 ],
         [168.78902  ,  82.57803  ,  59.789017 ],
         [160.86697  ,  86.69683  ,  62.2181   ]],

        ...,

        [[ 38.54462  ,  37.54462  ,  3

ipdb>  batch_data[1]


array([[1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0]])


ipdb>  batch_data[0].shape


(20, 224, 224, 3)


ipdb>  exit


In [22]:
train_features = np.zeros(shape=(len(ordered_imgs), 7, 7, 2048))
train_labels = np.zeros(shape=(len(ordered_imgs),5))

In [23]:
i = 0
for inputs_batch, labels_batch in train_generator:
    features_batch = res_fifty.predict(inputs_batch)
    train_features[i * batch_size : (i + 1) * batch_size] = features_batch
    train_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    i += 1
    if i * batch_size >= len(ordered_imgs):
        break
         
train_features = np.reshape(train_features, (len(ordered_imgs), 7 * 7 * 2048))

In [47]:
indices = np.random.choice(train_features.shape[0], len(ordered_imgs)//5, replace=False)
features = train_features[indices, :]
tlabels = train_labels[indices,:]
i,j = np.where(train_labels==1)
t_labels = j[indices]

In [48]:
from sklearn.multiclass import OutputCodeClassifier as occ
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

features_train, features_test, labels_train, labels_test = train_test_split(features,tlabels,test_size = 0.1)
clf = occ(SVC(random_state=69), random_state= 420)
clf.fit(features, t_labels)


OutputCodeClassifier(code_size=1.5,
           estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=69,
  shrinking=True, tol=0.001, verbose=False),
           n_jobs=None, random_state=420)

In [51]:
test_indices = np.random.choice(train_features.shape[0], len(ordered_imgs)//10, replace=False)
test_features = train_features[test_indices, :]
test_labels = j[test_indices]

In [42]:
test_ouput = clf.predict(test_features)

In [52]:
clf.score(test_features,test_labels)

0.6229508196721312

occ produces 0.63 accuracy w/ 3662//5 train images, frozen weights