# Python Implementation of Experiments for Thesis

### Import Libraries

In [1]:
#-------------------------Setting the randomness-------#
import numpy as np
np.random.seed(1)

from tensorflow import set_random_seed
set_random_seed(1)
#------------------------------------------------------#
import cv2

import keras
from keras.models import Sequential
from keras import optimizers
from keras.layers.normalization import BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.core import Dense, Dropout, Flatten, Activation,Reshape





Using TensorFlow backend.


In [2]:
num1=np.random.rand(2)
print(num1)

[0.417022   0.72032449]


In [3]:
np.random.seed(1)
num2=np.random.rand(2)
print(num2)

[0.417022   0.72032449]


## Load in training and testing dataset

In [4]:
from numpy import load
train_data = load('../../Processed_Dataset/training_data.npy')
test_data = load('../../Processed_Dataset/testing_data.npy')

In [5]:
print(train_data[0:5,0:1])
# print(train_data[3875])
print(len(train_data[3875:]))


[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]
1341


In [6]:
print(test_data[389])
print(test_data[390])

[1.         0.03137255 0.04705882 ... 0.         0.         0.        ]
[0.         0.12156863 0.12156863 ... 0.1254902  0.12941177 0.13333334]


 ## Method to choose balancing of training data

In [7]:
def balanced(option):
    if(option=="yes"):
        training_data = np.concatenate((train_data[0:1341],train_data[3875:]),axis=0)
        testing_data = np.concatenate((test_data[0:234],test_data[390:]))
        #training_data = train_data
        return training_data,testing_data
    else:
        training_data = train_data
        testing_data = np.concatenate((test_data[0:234],test_data[390:]))
        return training_data,testing_data


In [8]:
training_data,test_data=balanced("no")
print(training_data.shape)
print(test_data.shape)

(5216, 62501)
(468, 62501)


## We need to shuffle the data to allow the Model to learn in a varied manner


In [9]:
## Shuffling the data to make it more varied with yes or no to shuffling
def ShuffelData(data,option):
    if(option=="yes"):
        data = shuffle(data,random_state=0)
        return data
    else:
        return data
training_data = ShuffelData(training_data,"no")

In [10]:
X_train = training_data[0:,1:]
Y_train = training_data[0:,0:1]
X_test = test_data[0:,1:]
Y_test = test_data[0:,0:1]

In [11]:
y_train_ohe = keras.utils.to_categorical(Y_train, 2)
y_test_ohe = keras.utils.to_categorical(Y_test, 2)

In [12]:
X_train.shape[0]

5216

In [13]:
X_train = X_train.reshape(X_train.shape[0],250,250)
print(X_train.shape)
X_test = X_test.reshape(X_test.shape[0],250,250)
print(X_test.shape)

(5216, 250, 250)
(468, 250, 250)


In [13]:
#new_train = np.concatenate((X_train,)*3, axis=-1)

In [14]:
rgbArray = np.dstack((X_train,X_train,X_train))


In [15]:
rgbArray.shape

(5216, 250, 750)

In [19]:
rgbArray[...,0] = X_

ValueError: could not broadcast input array from shape (250,250,1) into shape (5216,250,250)

In [15]:
X_train = cv2.merge((X_train,X_train,X_train))
X_test = cv2.merge((X_test,X_test,X_test))

error: OpenCV(4.2.0) ..\modules\core\src\merge.dispatch.cpp:134: error: (-215:Assertion failed) 0 < cn && cn <= CV_CN_MAX in function 'cv::merge'


In [None]:
X_train.shape

## Create 2D CNN 

## 1. Define Sequential Model
## 2. Add 2D CNN with 32 filters,3x3 filters
## 3. Apply Relu
## 4. Apply Batch Norm
## Repeat x2
## 5. Apply MaxPooling(2,2)
## 6. Send through Fully Connected Layer


In [None]:
def create_model():
    model_CNN = Sequential()
    
    model_CNN.add(Conv2D(32,(3,3),input_shape=(250,250,1)))
    model_CNN.add(Activation('relu')) # Remember, Batch Norm is meant to go before activation. However, for purposes of recreating experriment 1 keep Batch Norm after activation
    
    model_CNN.add(Conv2D(32, (3, 3)))
    model_CNN.add(Activation('relu'))

    model_CNN.add(MaxPooling2D(pool_size=(2,2)))
    
    model_CNN.add(Flatten())# Flattens the output from the previos layer

    # Fully connected layer
    model_CNN.add(Dense(128))
    model_CNN.add(Activation('relu'))
    #model_CNN.add(Dropout(0.2))
    model_CNN.add(Dense(2))
    model_CNN.add(Activation('softmax'))
    
    return model_CNN

In [None]:
# print(create_model().summary())

# Setup for the experiment below

## 1. Model is ran with the above architeture 
## 2.Trained on Imbalanced Set
## 3. Tested on Balanced Set

# This is a test to see if running 10 experiments for the same setup of model produces the same results 

In [None]:
# accuracy = []
# for i in range(0,10):
#     model = create_model() 
#     opt = optimizers.Adam(lr=0.01)
#     model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
#     model.fit(X_train, y_train_ohe,batch_size=64,epochs=25,validation_data=(X_test, y_test_ohe))# model
#     score = model.evaluate(X_test, y_test_ohe)
#     accuracy.append(round(score[1]*100))


In [None]:
model = create_model()
opt = optimizers.Adam(lr=0.01)
model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
history = model.fit(X_train, y_train_ohe,batch_size=64,epochs=1,validation_data=(X_test, y_test_ohe))# model

In [None]:
print(history.history.keys())

# Plotting accuracy
## Remember to UNCOMMENT the .savefig 

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'],loc='upper left')
#plt.savefig('train_vs_test_accuracy_shuffled.png')
plt.show()


In [None]:
## What does the above graph tell us about the model 
## -> What we see is that when it comes to training the model, it has high accuracy on the training set.
# However when looking at the testing accuracy we can see the accuracy after every epoch is quite volatile. 
# What this tell us is that the model has clearly not been able to learn from training data. When looking at the graph it also shows overfitting 
## But this comes secondary to the concept of the model actually underfitting even though the the accuracy on the training data is high. 

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
#plt.savefig('train_vs_test_loss_shuffled.png')
plt.show()

In [None]:
score = model.evaluate(X_test, y_test_ohe)
print('Loss ' , score[0])
print('Test accuracy: ', score[1]*100)

In [None]:
y_pred = model.predict_classes(X_test)


## Lets Visualise the output of Confusion Matrix

In [None]:
import matplotlib.pyplot as plt
import pandas
import numpy as np
from sklearn.metrics import confusion_matrix

matrix = ["TN","FP"],["FN","TP"]
print(matrix[0])
print(matrix[1])

##Lets Visualise the output
conf=confusion_matrix(Y_test, y_pred)
print("Confusion Matrix")
print(conf)
# or we can use a heatmap from the seaborn library
#import seaborn as sn
#df_cm = pandas.DataFrame(conf, range(2), range(2))
#sn.set(font_scale=1.4)#for label size
#sn.heatmap(df_cm, cmap="YlGnBu", annot=True, annot_kws={"size": 20},fmt="d")# font size

from sklearn.metrics import classification_report
print(classification_report(Y_test,y_pred))