# Imports

In [None]:
import densenet
import math
import numpy as np
import os
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow import keras
import sklearn.metrics as metrics
import pickle

%matplotlib inline
np.random.seed(1)
directory = "/Users/sushil/Downloads/github_images"


# Load dataset

## Get labels

In [None]:
trainLabelsDict = {}

i = 0
for row in open('/Users/sushil/Downloads/trainLabels.csv'):
    if (i == 0):
        i += 1
    else:
        row = row.split(',')
        trainLabelsDict[row[0] + ".jpeg"] = int(row[1][:len(row[1]) - 1])
print(trainLabelsDict['10_left.jpeg'])

## Resize images

In [None]:
X = []
Y = []
i = 0
for filename in os.listdir(directory):
    if filename.endswith(".jpeg"): 
        if(i%50==0):
            print("i: ", i)
        image_path = os.path.join(directory, filename)
        im = Image.open(image_path)
        resized_image = im.resize((316,475),Image.ANTIALIAS)
        #resized_image.save(image_path[:-5]+"_new.jpeg",optimize=True,quality=85) #Don't need to save
        #print(list(resized_image.getdata())[50000])
        X_i = np.array(list(resized_image.getdata()))
        X_i = np.reshape(X_i,(316,475,3))
        X_i = X_i / 255.0
        Y_i = trainLabelsDict[filename]
        
        X.append(X_i)
        Y.append(Y_i)
        i+=1

In [None]:
X = np.array(X)
Y = np.reshape(np.array(Y),(1,len(Y))).T
print(X.shape)
print(Y.shape)
nExamples = X.shape[0]
testSplit = .1

In [None]:
X_train = X[:int(nExamples*(1-testSplit)),:,:]
X_test = X[int(nExamples*(1-testSplit)):,:,:]
Y_train = Y[:int(nExamples*(1-testSplit)),]
Y_test = Y[int(nExamples*(1-testSplit)):,]
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

# DenseNet

## Get Weights

In [None]:
# 'tf' dim-ordering
image_dim = (316, 475, 3)

model = densenet.DenseNetImageNet121(input_shape=image_dim)

## Feed Pre-trained model our images

In [7]:
# updatable plot
# a minimal example (sort of)
from matplotlib import pyplot as plt
from IPython.display import clear_output
class PlotLosses(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.legend()
        plt.show();
        
plot_losses = PlotLosses()

In [None]:
from keras.models import Sequential
from keras.layers import Activation, Dense
#sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
num_class = 5
model.layers.pop()
model.outputs = [model.layers[-1].output]
model.layers[-1].outbound_nodes = []
for layer in model.layers[:-10]:
    layer.trainable = False
oldModel = model
model = Sequential()
model.add(oldModel)
model.add(Dense(num_class, activation='softmax'))

model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
class_weight={0: 0.133841, 1: 1.15771, 2: 0.822009, 3: 1.60461, 4: 1.6956}
model.summary()

In [None]:
model.fit(X_train, Y_train, epochs=20, validation_split = .11, class_weight = class_weight)

# Predict

In [11]:
predictions = model.predict(X_test)

In [None]:
print (predictions.shape)
def findMaxProbClass(x, axis=None):
    y = []
    for i in range(x.shape[0]):
        y.append(np.argmax(x[i]))
    return np.array(y)
maxPredictions = findMaxProbClass(predictions)
print(maxPredictions.T)
print(Y_test.T)

# Evaluation Metrics

In [None]:
confusion_matrix = metrics.confusion_matrix(y_true=Y_test, y_pred=maxPredictions)
print ('CONFUSION MATRIX:')
print(confusion_matrix)

print ('\nCLASSIFICATION REPORT:')
print(metrics.classification_report(y_true=Y_test, y_pred=maxPredictions))

print ('\nSENSITIVITY AND SPECIFICITY:')
overallTP = 0
overallFN = 0
overallFP = 0
overallTN = 0
print('\nPer class:')
for i in range(len(confusion_matrix)):
    tp = confusion_matrix[i][i] #diagonal
    fn = np.sum(confusion_matrix[i]) - tp
    fp = np.sum(confusion_matrix.T[i]) - tp
    tn = np.sum(confusion_matrix) - tp - fp - fn
    overallTP += tp
    overallFN += fn
    overallFP += fp
    overallTN += tn
    sensitivity = tp/(tp+fn)
    specificity = tn/(tn+fp)
    recall = tp/(tp+fn)
    precision = tp/(tp+fp)
    print('p: ' + str(precision) + '\tr: ' + str(recall))
    print('sensitivity: ' + str(sensitivity) + '\tspecifity: ' + str(specificity))
print('\nOverall (micro avg.) :')
microAvgSensitivity = overallTP/(overallTP + overallFN)
microAvgSpecificity = overallTN/(overallTN + overallFP)
print('sensitivity: ' + str(microAvgSensitivity) + '\tspecifity: ' + str(microAvgSpecificity))

print ('\nACCURACY PER CLASS: ')
cm = confusion_matrix
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print(cm.diagonal())

print ('\nACCURACY: ')
print(metrics.accuracy_score(y_true=Y_test, y_pred=maxPredictions))

### Post-Model Pickle

In [14]:
filename = 'model.pickle'
pickle.dump(model, open(filename, 'wb'))

### Baseline

In [None]:
model = keras.Sequential()
model.add(keras.layers.Conv2D(64, kernel_size=3, activation='relu', input_shape=(316,475,3)))
#model.add(keras.layers.Conv2D(32, kernel_size=3, activation='relu'))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(5, activation=tf.nn.softmax))

model.compile(optimizer='sgd', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X, Y, epochs=50, validation_split = 0.2)