This is a demonstration of CNN in Keras solving the problem of blob classification.

You need some additional reep learning libraries to run this example. Use another virutal environment for this occasion:

```
conda env create -f deeplearn_env.yml
conda activate deeplearn_env
python -m ipykernel install --user --name=deeplearn_env
conda deactivate
```
Then, restart the jupyter lab and choose `deeplearn_env` kernel.


In [None]:
#This function provides some data as a Pandas dataframe
from utils.practice_data import generateBlobsData
from utils.practice_data import showBlobs

imageDir = './assets/simple_blobs/'
#imageDir = './assets/difficult_blobs/' #images not "segmented" precisely
number_of_samples = 1200
image_size = 64# in pixels
number_of_classes = 11 #2-6 - normal blobs. 7-11 more difficult
problem = generateBlobsData(imageDir, number_of_classes, number_of_samples,image_size,noiseSize=20)

In [None]:
%matplotlib inline
showBlobs(problem.head(8))

In [None]:
#Based on Keras MINST dataset classification tutorial TODO link
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
x=np.stack(problem['raw_data'])
y=np.stack(problem['class'])
x_trainval, x_test, y_trainval, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.2, random_state=0)

# input image dimensions
img_rows, img_cols = x_train.shape[1], x_train.shape[2]

In [None]:

print('Training data shape:', x_train.shape)
#Working with different backends of the same library requires some different pre-processing
if keras.backend.image_data_format() == 'channels_first':
    display("Converting channel order....")
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_val = x_val.reshape(x_val.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_val = x_val.reshape(x_val.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
print('Training data shape - ready for processing:', x_train.shape)

In [None]:
import matplotlib.pyplot as plt
#TODO what would happen if we won't normalise the input?
print("Input data os of type {} with min and max values {} and {}"
      .format(x_train.dtype,x_train.min(),x_train.max()))
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_val /= 255
x_test /= 255
print("Now it is normalised {} with min and max values {} and {}"
      .format(x_train.dtype,x_train.min(),x_train.max()))
#TODO - If you repeat execution of this cell you will see that it is a bad programming style!
#What would you change here?

In [None]:
#Parameters
batch_size = 16 #how many examples go through the network at once
epochs = 5 #how many times we want to bring the whole dataset through

# convert class vectors to binary class matrices
y_train_onehot = keras.utils.to_categorical(y_train, number_of_classes)
y_val_onehot = keras.utils.to_categorical(y_val, number_of_classes)
y_test_onehot = keras.utils.to_categorical(y_test, number_of_classes)

In [None]:
#One-hot encoding
display(y_val_onehot[0])
display(y_val[0])

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(number_of_classes, activation='softmax'))

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [None]:
fitting_history = model.fit(x_train, y_train_onehot,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_val, y_val_onehot))
score = model.evaluate(x_val, y_val_onehot, verbose=0)
print('Validation loss:', score[0])
print('Validation accuracy:', score[1])

In [None]:
print(fitting_history.history.keys())
%matplotlib widget
plt.plot(fitting_history.history['loss'])
plt.show()

In [None]:
#Let's see how our CNN performs on the unseen test set
from sklearn.metrics import confusion_matrix

y_predict_test = model.predict_classes(x_test)
conimg = confusion_matrix(y_test,y_predict_test)
plt.imshow(conimg,cmap=plt.cm.Blues)
plt.show()