# 1. Packages and Data

## 1.1 Packages

In [None]:
# Data packages
import pandas as pd
import numpy as np

# Image manipulation
from PIL import Image

# Other system packages
import os

# Keras functions 
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Activation, Flatten, MaxPool2D, BatchNormalization
from keras.optimizers import Adam

# sklearn functions
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

## 1.2 Data

In [None]:
# Let's use the AT1A data

# Set the dataset directory
dataset_dir = os.getcwd() + "/data"

# Get the data labels
labels_file = dataset_dir + "/train_selected.csv"
data_labels = pd.read_csv(labels_file)

data_labels.shape

In [None]:
# Get X files
file_list = [dataset_dir + "/" + str(x) + ".png" for x in list(data_labels["id"])]

In [None]:
# Get the labels
data_labels["class"] = np.where(data_labels['label']=='automobile', 1, 0)
data_labels["class"].value_counts()

In [None]:
def standarise_data(dataset):
    
    new_dataset = dataset/255.
    
    return new_dataset

In [None]:
def load_data():
    global X_train, X_test, y_train, y_test, X, y
    
    X = np.array([np.array(Image.open(fname)) for fname in file_list])
    y = np.array(data_labels["class"])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
#     y_train = y_train.reshape(1, y_train.shape[0]) # NOTE NO NEED FOR THIS WITH KERAS
#     y_test = y_test.reshape(1, y_test.shape[0]) # NOTE NO NEED FOR THIS WITH KERAS
    
    
    # Reshape the training and test examples 
#     X_train_f = X_train.reshape(X_train.shape[0], -1) # NOTE THE REMOVAL OF TRANSPOSE HERE
#     X_test_f = X_test.reshape(X_test.shape[0], -1) # NOTE THE REMOVAL OF TRANSPOSE HERE
    
    # We could also use the keras flatten if we wanted
    # https://www.tensorflow.org/api_docs/python/tf/keras/layers/Flatten
    
    # Standardize data to have feature values between 0 and 1.
    X_train = standarise_data(X_train)
    X_test = standarise_data(X_test)
    

    print ("Flatten X_train: " + str(X_train.shape))
    print ("Flatten X_test: " + str(X_test.shape))
    
    print ("y_train: " + str(y_train.shape))
    print ("y_test: " + str(y_test.shape))
    
    return

In [None]:
load_data()

In [None]:
# convert class vectors to binary class matrices

# Note we will save y_test for our sklearn metrics
y_test_skl = y_test.copy()

y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)
print ("y_train: " + str(y_train.shape))
print ("y_test: " + str(y_test.shape))

# 2. Lab

## 2.1 Simple CNN

Let's start out with a simple CNN architecture. We will have:

* Conv > Conv (Relu) 36 filters each time.
* Max Pooling (size 2)
* Fully connected layer (size 64)
* Fully connected layer (size 2)
    * Sigmoid activation to get us to binary output

In [None]:
model = Sequential()
model.add(Conv2D(filters=36, kernel_size=3, padding="same", input_shape=[32, 32, 3]))
model.add(Activation("relu"))
model.add(Conv2D(filters=36, kernel_size=3, padding="same")) #You could add activations inside this if you like, or as a separate layer
model.add(Activation("relu"))
model.add(MaxPool2D(pool_size=2))


model.add(Flatten()) # Stretching out for our FC layer
model.add(Dense(64))
model.add(Activation("relu"))
		
# Binary classifier
model.add(Dense(2))
model.add(Activation("sigmoid"))

In [None]:
model.summary()

In [None]:
model.compile(loss="binary_crossentropy",
              optimizer=Adam(), metrics=["accuracy"])

In [None]:
history = model.fit(X_train, y_train, epochs=5,
                    validation_data=(X_test, y_test))

In [None]:
# We can get our score
score = model.evaluate(X_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
# We can also get raw probabilities

# Let's set a nice print option
np.set_printoptions(suppress=True)

predictions = model.predict_proba(X_test)
predictions[0:5]

# Here we can see it is probabilities of two classes. Negative first, then positive.

In [None]:
predictions[:,1].shape # This is how we can slice out the probability of positive class

In [None]:
predictions[:,1][0:5]

In [None]:
# We can also get class predictions
predictions_classes = model.predict_classes(X_test)
predictions_classes[0:5]

In [None]:
# Build some sklearn scores

#Get confusion matrix 
print("Confustion Matrix \n", confusion_matrix(list(y_test_skl), list(predictions_classes)))

#Get classification report
print(classification_report(y_test_skl, predictions_classes))

# Accuracy score
print("Accuracy: ", accuracy_score(y_test_skl, predictions_classes))

# ROC_AUC score
print("ROC_AUC: ", roc_auc_score(y_test_skl, predictions[:,1]))

## 2.2 Deeper + batch norm

Let's go a bit deeper and add some batch normalisation to assist us with this depth

Let's make some modifications to our network:

* Triple the depth
* Add Batch Norm after the convolutions, before activation
* Give it some more epochs

In [None]:
model = models.Sequential()
model.add(Conv2D(filters=36, kernel_size=3, padding="same", input_shape=[32, 32, 3]))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters=36, kernel_size=3, padding="same"))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPool2D(pool_size=2))

model.add(Conv2D(filters=36, kernel_size=3, padding="same"))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters=36, kernel_size=3, padding="same"))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPool2D(pool_size=2))


model.add(Flatten()) # Stretching out for our FC layer
model.add(Dense(64))
model.add(Activation("relu"))
		
# Sigmoid classifier
model.add(Dense(2))
model.add(Activation("sigmoid"))

In [None]:
model.compile(loss="binary_crossentropy",
              optimizer=Adam(), metrics=["accuracy"])

In [None]:
history = model.fit(X_train, y_train, epochs=15,
                    validation_data=(X_test, y_test),
                   batch_size = 64)

In [None]:
# We can get our score
score = model.evaluate(X_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
# Get our probabilities, classes and scores
predictions = model.predict_proba(X_test)
predictions_classes = model.predict_classes(X_test)

#Get confusion matrix 
print("Confustion Matrix \n", confusion_matrix(list(y_test_skl), list(predictions_classes)))

#Get classification report
print(classification_report(y_test_skl, predictions_classes))

# Accuracy score
print("Accuracy: ", accuracy_score(y_test_skl, predictions_classes))

# ROC_AUC score
print("ROC_AUC: ", roc_auc_score(y_test_skl, predictions[:,1]))

## 2.3 Play around!

Now it is your turn, play around with adding more layers, try different optimisers or some better settings on the existing ones.

## 2.4 Transfer learning

Now let's see how a prebuilt model performs compared to our playing around above. You can find many built in networks on the keras documentation page:

* http://keras.io/applications/

Let's use Xception trained on imagenet and see how well it does.

From the documentation, some important inputs:

`include_top`: whether to include the 3 fully-connected layers at the top of the network.

`weights:` one of None (random initialization) or 'imagenet' (pre-training on ImageNet).

`classes:` optional number of classes to classify images into, only to be specified if include_top is  True, and if no weights argument is specified.


It is important to note that these prebuilt architectures expect a certain input, so our images will need to be reshaped

### 2.4.1 Set up

In [None]:
# Firstly let us take a smaller sample of images
X_train = X_train[0:750]
y_train = y_train[0:750]

X_test = X_test[0:250]
y_test = y_test[0:250]

In [None]:
# Firstly let us create an instance of the prebuilt model

from keras.applications import InceptionV3

InceptionV3_prebuilt = InceptionV3(weights='imagenet', include_top=False, input_shape=[299, 299, 3])

In [None]:
import pickle

# Saving this out for use
with open('inception_model.pickle', 'wb') as file_out:
    pickle.dump(InceptionV3_prebuilt, file_out)
    file_out.close()

We can skip to here to read in the model

In [None]:
# Load data (deserialize)
with open('inception_model.pickle', 'rb') as file_in:
    InceptionV3_prebuilt = pickle.load(file_in)

When reshaping the images you can use a variety of packages:

* https://scikit-image.org/
* opencv (https://stackoverflow.com/questions/48121916/numpy-resize-rescale-image)
* You can also use keras built in image processing which also augments on the fly
    * https://keras.io/preprocessing/image/

In [None]:
# We need to do some reshaping
import skimage.transform

X_train_reshape = []

count = 0

for image in X_train:
    
    count +=1 
    if count % 500 == 0:
        print("Done {} images".format(count))
        
    new_image = skimage.transform.resize(image, (299, 299), mode='constant')
    X_train_reshape.append(new_image)


In [None]:
# We need to do some reshaping
import skimage.transform

X_test_reshape = []

count = 0

for image in X_test:
    
    count +=1 
    if count % 500 == 0:
        print("Done {} images".format(count))
        
    new_image = skimage.transform.resize(image, (299, 299), mode='constant')
    X_test_reshape.append(new_image)


In [None]:
# Turn into numpy array
X_train_reshape = np.array(X_train_reshape)

X_test_reshape = np.array(X_test_reshape)

In [None]:
# Prove we have the right shapes now
print(X_train.shape)
print(X_train_reshape.shape)
print(y_train.shape)

print(X_test.shape)
print(X_test_reshape.shape)
print(y_test.shape)


In [None]:
from sys import getsizeof
print(getsizeof(X_train_reshape)/1000000)
print(getsizeof(X_test_reshape)/1000000)

In [None]:
import pickle

# Saving this out for use
with open('X_train_inception.pickle', 'wb') as file_out:
    pickle.dump(X_train_reshape, file_out)
    file_out.close()

with open('X_test_inception.pickle', 'wb') as file_out:
    pickle.dump(X_test_reshape, file_out)
    file_out.close()

We can skip to here to read in the model

In [None]:
# Load data (deserialize)
with open('X_train_inception.pickle', 'rb') as file_in:
    X_train_reshape = pickle.load(file_in)
    file_in.close()

with open('X_train_inception.pickle', 'rb') as file_in:
    X_test_reshape = pickle.load(file_in)
    file_in.close()

### 2.4.2 No retraining

In [None]:
# Set all the layers to not be trainable

for layer in InceptionV3_prebuilt.layers[:]:
    layer.trainable = False

In [None]:
# Now let us add our dense final layer
x = InceptionV3_prebuilt.output
x = Flatten()(x)
x = Dense(64)(x)
x = Activation("relu")(x)

preds = Dense(2, activation="sigmoid")(x)

In [None]:
from keras.models import Model 
final_model = Model(input=InceptionV3_prebuilt.input, outputs=preds)

In [None]:
from keras import optimizers 
final_model.compile(loss="binary_crossentropy",
              optimizer=optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])

In [None]:
history = final_model.fit(X_train_reshape, y_train, epochs=5,
                    validation_data=(X_test_reshape, y_test),
                   batch_size = 64)

### 2.4.3 Retraining

Now it is over to you, try unfreezing some layers and retraining. Or adding on more layers to the end. See what you can come up with!