<a href="https://colab.research.google.com/github/mcldwitt/workshops/blob/main/Deep_learning_workshop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <font color='darkorange'> Welcome to the reinforcement learning workshop!

To run a cel click on the play button next to it

<font color='red'>do not forget to run the first cell!</font> This will import all the necesary functions, modules and data.  
This might take a while

In [None]:
# This might take a while
!pip install lime==0.2.0.1
%load_ext autoreload
%autoreload 2
import os,sys
try:
    import lime
except:
    sys.path.append(os.path.join('..', '..')) # add the current directory
    import lime
from lime import lime_image

import zipfile
import os
from skimage.io import imread, imsave, imshow
from skimage import data, color, io, filters, morphology,transform, exposure, feature, util
from scipy import ndimage
import matplotlib.pyplot as plt
import pandas as pd
from random import shuffle
import numpy as np
from skimage.color import rgb2gray
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import classification_report
import seaborn as sns
from skimage.segmentation import mark_boundaries

!wget --no-check-certificate \
    "https://github.com/mcldwitt/workshops/archive/refs/heads/main.zip" 


zip_ref = zipfile.ZipFile('main.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('.') #Extracts the files into the /tmp folder
zip_ref.close()

def histplot(history, name):
    fig, axs = plt.subplots(1,2, figsize=(12,6))
    print(fig)

    axs[0].plot(history.history['loss'],'red',linewidth=3.0, label="loss")
    axs[0].plot(history.history['val_loss'],'blue',linewidth=3.0, label="val_loss")
    axs[0].legend()
    axs[0].set_xlabel('epochs')
    axs[0].set_ylabel('Training error')
    axs[0].set_ylim((0,1.5))
    axs[0].grid()
    axs[0].set_title(name)

    axs[1].plot(history.history['accuracy'],'red',linewidth=3.0, label="accuracy")
    axs[1].plot(history.history['val_accuracy'],'blue',linewidth=3.0, label="val_accuracy")
    axs[1].legend()
    axs[1].set_xlabel('epochs')
    axs[1].set_ylabel('Accuracy')
    axs[1].grid()
    fig.savefig(name+'.png')

# <font color='deeppink'> Let's start with importing the data.</font>  
We will be working with images containing either spiders or elephants.

In [None]:
path = ['./workshops-main/elephant', './workshops-main/spiders']
valid_images = [".jpg",".gif",".png"]
images = []
y = []
grayscale = False


for p in path:
  for f in os.listdir(p)[:]:
      ext = os.path.splitext(f)[1]
      if ext.lower() not in valid_images:
        continue


      im = imread(os.path.join(p,f)) 
      im = transform.resize(im,(200,200,3),mode='constant',anti_aliasing=True)
      images.append(np.array(im))
      x = p.split("/")
      y.append(x[2])

images = np.array(images)
y = np.array(y)



# <font color='deeppink'> Now we are going to visualise some of the samples.</font>  


In [None]:
import random
samples = random.sample(range(1, len(y)), 10)
print(samples)

fig, axs = plt.subplots(1,10, figsize=(30,10))
axs[0].set_title("dataset")
for img, a in zip(samples, axs):
    a.imshow(images[img])
    a.set_axis_off()
    a.title.set_text(y[img])



# <font color='deeppink'> It's important we have approximatly the same amount of images with spiders and elephants in our dataset. Let's visualise this.</font>  

In [None]:
# insert code here

# <font color='deeppink'> 

In [None]:
ind_list = [i for i in range(len(y))]
shuffle(ind_list)

X = images[ind_list]
y = y[ind_list]

# <font color='deeppink'> Let's convert the target into numbers, and check wether we need to standardise our data </font>

In [None]:
# checking standardisation
print("Range of input values : ", (X.min(), X.max()))

# convert the target
print(y)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
y

# <font color='deeppink'> Splitting the data into train and test_set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# <font color='deeppink'> Creating the AI model

In [None]:
input_shape = X_train[0].shape

# Model
model = Sequential()

# You can add additional convolutional layers and maxpooling layers

model.add(Conv2D(8, kernel_size=(3, 3), activation='relu',input_shape=input_shape)) 
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(16, (3, 3), activation='relu')) 
model.add(MaxPooling2D(pool_size=(2, 2)))



model.add(Flatten()) 
model.add(Dense(5, activation='relu')) # you can change the number of neurons or add an aditional dense layer


model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    
model.summary()


# <font color='deeppink'> Training the AI model

In [None]:
batch_size = 64 # you can change the number, but it should be a multiple of 8
epochs     = 50 # you can chance te number ranging from 1 to 300

mc = ModelCheckpoint('best_model1.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 10)

history = model.fit(X_train, y_train, batch_size=batch_size, validation_split = 0.1, epochs=epochs, verbose=1,
                   callbacks=[es, mc])

# <font color='deeppink'> How did the model perform? </font>
We want the accuracy as close to 1 as possible and the loss as close as possible to 0.

In [None]:
histplot(history, "First test model (50x50)")    

# <font color='deeppink'> load the best model and check performance on unseen data

In [None]:
model.load_weights('best_model1.h5')

y_pred = (model.predict(X_test) > 0.5).astype("int32")

print("Accuracy score : %.1f"%(accuracy_score(y_test, y_pred) * 100)) 
print()
print(classification_report(y_test, y_pred))
cf = confusion_matrix(y_test, y_pred)
print("Confusion matrix : ")
print(cf)


# <font color='deeppink'> Misclassified samples only continue to this step if you have less than 10 misclassified samples</font>

The most interesting samples to check are the misclassified samples. Which samples were misclassified?

In [None]:
# find misclassified sample
misclassified_samples = []
for i in range(len(y_test)):
  if y_test[i] != y_pred[i,0]:
    misclassified_samples.append(i)
print(misclassified_samples)

# <font color='deeppink'> Change the samples back to strings

In [None]:
y_pred = le.inverse_transform(y_pred)
y_test = le.inverse_transform(y_test)
y_test

# <font color='deeppink'> Visualise the misclassified samples

In [None]:
fig, axs = plt.subplots(1,len(misclassified_samples), figsize=(30,20))

for indx, a in zip(misclassified_samples, axs):
    a.imshow(X_test[indx])
    a.set_axis_off()
    a.title.set_text(y_pred[indx])

# <font color='deeppink'> Let's try to have a look at what is going wrong with the misclassified samples </font>

In [None]:
fig, axs = plt.subplots(1,len(misclassified_samples), figsize=(30,20))

for indx, a in zip(misclassified_samples, axs):
    explainer = lime_image.LimeImageExplainer(verbose=False)
    explanation = explainer.explain_instance(X_test[indx].astype('double'), model.predict, top_labels=5, hide_color=0, num_samples=500) # you can change num_samples from 100 to 2000, keep in mind, the higher the number, the more time needed for computations
    temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=1000, hide_rest=True)
    a.imshow(mark_boundaries(temp / 2 + 0.5, mask))
    a.set_axis_off()
    a.title.set_text(y_pred[indx])

# <font color='deeppink'> Let's visualise it again, but this time we will see the entire image








In [None]:
fig, axs = plt.subplots(1,len(misclassified_samples), figsize=(30,20))

for indx, a in zip(misclassified_samples, axs):
    explainer = lime_image.LimeImageExplainer(verbose=False)
    explanation = explainer.explain_instance(X_test[indx].astype('double'), model.predict, top_labels=5, hide_color=0, num_samples=500)
    temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=1000, hide_rest=False)
    a.imshow(mark_boundaries(temp / 2 + 0.5, mask))
    a.set_axis_off()
    a.title.set_text(y_pred[indx])

# <font color='deeppink'> Let's do the same, for some samples that were not misclassified. What is the AI model looking at?

In [None]:
good_samples = []
while len(good_samples) != len(misclassified_samples):
  s = random.sample(range(1, len(y_test)), 1)
  
  if (s[0] not in misclassified_samples) & (s[0] not in good_samples):
    good_samples.append(s[0])
print(good_samples)

In [None]:
fig, axs = plt.subplots(1,len(good_samples), figsize=(30,20))

for indx, a in zip(good_samples, axs):
    a.imshow(X_test[indx])
    a.set_axis_off()
    a.title.set_text(y_pred[indx])

In [None]:
fig, axs = plt.subplots(1,len(good_samples), figsize=(30,20))

for indx, a in zip(good_samples, axs):
    explainer = lime_image.LimeImageExplainer(verbose=False)
    explanation = explainer.explain_instance(X_test[indx].astype('double'), model.predict, top_labels=5, hide_color=0, num_samples=500)
    temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=1000, hide_rest=True)
    a.imshow(mark_boundaries(temp / 2 + 0.5, mask))
    a.set_axis_off()
    a.title.set_text(y_pred[indx])