# Image Classification & Anomaly Detection

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

## Data Description

**Data**: The MNIST database (Modified National Institute of Standards and Technology database) is a large database of handwritten digits that is commonly used for training various image processing systems. Some examples from the MNIST are below: 

In [None]:
## Loading data

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
## Shapes

print('X_train shape :', x_train.shape)
print('y_train shape :', y_train.shape)

In [None]:
## Let's visualize first 3 data points.

fig, ax = plt.subplots(1,3, figsize=(16,4))
for i in range(3):
    image = np.reshape(x_train[i], (28, 28))
    ax[i].imshow(image, cmap='Greys');

## Preprocessing Before Model

In [None]:
# Create validation data on train data

x_valid = x_train[48000:]
y_valid = y_train[48000:]

x_train = x_train[:48000]
y_train = y_train[:48000]

In [None]:
print(x_train.shape[0], 'train samples')
print(x_valid.shape[0], 'validation samples')
print(x_test.shape[0], 'test samples')

In [None]:
# Flattening the images from the 28x28 pixels to 1D

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_valid = x_valid.reshape(x_valid.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

In [None]:
x_train = x_train.astype('float32')
x_valid = x_valid.astype('float32')
x_test = x_test.astype('float32')

In [None]:
# Normalizing pixel values (0-255) > (0-1)

x_train /= 255
x_test /= 255

In [None]:
# One-hot encoding using keras

from tensorflow.keras.utils import to_categorical

y_train = to_categorical(y_train, 10)
y_valid = to_categorical(y_valid, 10)
y_test = to_categorical(y_test, 10)

## Building Classification Model

In [None]:
# Building a linear stack of layers with the sequential model

from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy

model = Sequential()

# convolutional layer
model.add(Conv2D(32, kernel_size=(3,3), strides=(1,1), activation='relu', input_shape=(28,28,1)))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(64, kernel_size=(3,3), strides=(1,1), activation='relu', input_shape=(28,28,1)))
model.add(MaxPool2D(pool_size=(2,2)))

# flatten output of conv
model.add(Flatten())

# hidden layer
model.add(Dense(32, activation='relu'))

# output layer
model.add(Dense(10, activation='softmax'))

In [None]:
model.summary()

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
# compiling the sequential model
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

In [None]:
# training the model for 2 epochs
history = model.fit(x_train, y_train, batch_size=128, epochs=3, validation_data=(x_valid, y_valid))
history

<pre>

CNN method has the best accuracy result and also the worst source performance compared to other methods according by literature. Especially, the feature extraction layer in CNN can capture the sharp and important points of the image that comes from the numbers.The structure of a CNN is actually very similar to Regular Neural Networks. Just like in RegularNets, we use a loss function and an optimizer in CNNs. Additionally, in CNNs, there are also Convolutional Layers, Pooling Layers, and Flatten Layers. So I thought CNN was the most appropriate method.

</pre>

## Measurement Model Performance

In [None]:
test_scores = model.evaluate(x_test, y_test,verbose = 0)

print("Train Accuracy =", history.history['accuracy'][-1])
print("Validation Accuracy =", history.history['val_accuracy'][-1])
print('Test accuracy:', test_scores[1])
print("--------------------------------------")
print("Train Loss =", history.history['loss'][-1])
print("Validation Loss =", history.history['val_loss'][-1])
print('Test loss:', test_scores[0])

<pre>

When we look at the performance metrics, it is possible to say that the model learns very well and makes predictions with very high accuracy in the test data.

</pre>

In [None]:
fig = plt.figure()
plt.figure(figsize=(12,6))
plt.subplot(2,1,1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.subplot(2,1,2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.tight_layout()
fig;

## 2D Visualizing Data in 2D Latent Dimensions with t-SNE

In [None]:
(x_train,y_train), (x_test,y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
x = x_train.reshape(60000,784)

#### TruncatedSVD

<font color='orange'>**We will use t-SNE as the model for 2 dimension, but before we will use TruncatedSVD to reduce the dimension to 50. Because, in essence, tSNE requires pairwise comparison of datapoints, so it can be incredibly computationally taxing on scRNA-seq datasets unless the dimensionality undergoes an initial reduction.**

In [None]:
from sklearn.preprocessing import StandardScaler
x = StandardScaler().fit_transform(x)

In [None]:
from sklearn.decomposition import TruncatedSVD
tsvd = TruncatedSVD(n_components=50).fit_transform(x)

#### t-SNE

In [None]:
from sklearn.manifold import TSNE
tsne_res = TSNE(n_components=2, n_jobs = -1, random_state = 42).fit_transform(tsvd)

#### t-SNE Visualization

In [None]:
import seaborn as sns

plt.figure(figsize=(14, 14))
plt.title("Visualization of t-SNE results on MNIST train data", fontsize=24, weight='bold')
sns.scatterplot(tsne_res[:, 0], tsne_res[:, 1], data = tsne_res, hue=y_train, palette="bright", legend="full")
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel("Component 1", fontsize=16)
plt.ylabel("Component 2", fontsize=16)
plt.legend(fontsize=16);

Some interesting findings:

<pre>
We can see that images of 7 are more close to images of 9 than images of 1. Also when we look at the middle region, it is remarkable that 3, 5 and 8 are more confused with each other because they are similar numbers compared to other groups. Since the algorithm separates the corners well, it is seen that 9 and 6 are very far from each other. The fact that 4 is very close to 9 may indicate that there are anomalies in these pictures.
</pre>

## Anomally Detection with PCA

In [None]:
(x_train,y_train), (x_test,y_test) = tf.keras.datasets.mnist.load_data()
x_train = pd.DataFrame(x_train.reshape(60000,x_train.shape[1]**2))

In [None]:
x_train.head()

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components = 5)
pca_data = pca.fit_transform(x_train)
inverse_pca_data = pca.inverse_transform(pca_data)

print("x_train shape",x_train.shape)
print("pca_data shape",pca_data.shape)
print("inverse_pca_data shape",inverse_pca_data.shape)

In [None]:
# Reconstruction Error
MSE = ((x_train-inverse_pca_data)**2).sum(axis=1)

In [None]:
# Visualization 20 digits with the highest MSE ( decreasing from left to right )

MSE_max_scores = MSE.nlargest(20).index

plt.figure(figsize = (18,10))

for i in range(20):  
    plt.subplot(4, 5, i+1)
    plt.imshow(x_train.iloc[MSE_max_scores[i]].values.reshape(28,28),interpolation='nearest', cmap='Greys')
plt.show()

In [None]:
# 3 photos of each digit with the highest MSE

plt.figure(figsize = (20,15))
row, colums = 3, 10
    
for number in range(10):
    dataset = pd.DataFrame(x_train[(y_train == number)].reset_index().drop("index",axis = 1))
    pca = PCA(n_components = 5)
    pca_dataset = pca.fit_transform(dataset)

    inverse_transform_dataset = pca.inverse_transform(pca_dataset)
    MSE_score = ((dataset-inverse_transform_dataset)**2).sum(axis=1)
    MSE_worst = MSE_score.nlargest(3).index
    for number2 in range(0,3):
        plt.subplot(colums, row, (number2+(number*3))+ 1)
        plt.imshow(dataset.iloc[MSE_worst[number2]].values.reshape(28,28),interpolation='nearest', cmap='Greys')
plt.show()

<pre>
The technique roughly calculates the distance between the projection of the subspace created by PCA for each data point and the original data point. ( MSE as Reconstruction Error ) 

The larger this distance is, the more abnormal the initial image is. Because, even when a decent photograph is reduced to subspace, its structure is not deformed beyond recognition.
</pre>

<font color='blue'>*Created with* ❤ *by Mustafa Batuhan Ermiş.*<font>