<h1 style=" text-align:center; color:Blue; font-size:40px;"> <u><b>Cat Breeds prediction with InceptionV3</b></u> </h1>

## 1. Importing Libraries


In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from pathlib import Path
import matplotlib.pyplot as plt
from scipy import ndimage, misc

from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, SGD, RMSprop
from keras.applications.inception_v3 import InceptionV3, preprocess_input


## 2. Extrating Files

In [None]:
'''
    Must contain folder of cat images in "images" folder 
'''

filelist  = []

for dirname, _, filenames in os.walk('images'):
    for filename in filenames:
        filelist.append (os.path.join(dirname, filename))

In [None]:
len(filelist)

In [None]:
filelist

### Taking only those 10 labels given in the assignment.

In [None]:
labels_needed = ['Bombay', 'Calico', 'Burmese', 'Himalayan', 
                 'Munchkin', 'Ragdoll', 'Siberian', 'British Shorthair', 'Russian Blue', 
                 'Dilute Calico']

In [None]:
Filepaths   = []
labels = []

for image_file in filelist:
    label = image_file.split(os.path.sep)[-2]
    if label in labels_needed:

        Filepaths.append(image_file)
        labels.append(label)

In [None]:
set(labels)

In [None]:
len(Filepaths), len(labels)

### Creating a dataframe with file paths and the labels for them

In [None]:
df = pd.DataFrame( list( zip (Filepaths, labels) ), columns = ['Filepath', 'Labels'] )
df

In [None]:
df = (df.sample(frac = 1).reset_index()).drop(columns = 'index')
df

### Displaying first 9 pictures

In [None]:
f,a = plt.subplots(nrows=3, ncols=3,figsize=(13, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(a.flat):
    ax.imshow(plt.imread(df.Filepath[i]))
    ax.set_title(df.Labels[i])
    
plt.tight_layout()
plt.show()

In [None]:
ax=pd.value_counts(df['Labels'],ascending=True).plot(kind='barh',
                                                       fontsize="40",
                                                       title="Distribution Of classes",
                                                       figsize=(15,8))
ax.set(xlabel="Images per class", ylabel="Classes")
ax.xaxis.label.set_size(40)
ax.yaxis.label.set_size(40)
ax.title.set_size(60)
plt.show()

### Checking for class imbalance

In [None]:
df.Labels.value_counts()

## 3. Splitting Data

In [70]:
train_ratio = .75
validation_ratio = 0.10
test_ratio = 0.25

train, test = train_test_split(df, test_size = test_ratio )
val, test = train_test_split(test, test_size=test_ratio/(test_ratio + validation_ratio))

## 4. Pre- processing

In [71]:
img_datagen = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=30, 
                                   width_shift_range=0.2,
                                   height_shift_range=0.2, 
                                   horizontal_flip = 'true')
   

x_train =  img_datagen.flow_from_dataframe(dataframe = train,  x_col='Filepath', y_col='Labels',  target_size=(299, 299), shuffle=False, batch_size=10, seed=10)
x_val = img_datagen.flow_from_dataframe(dataframe = val,  x_col='Filepath', y_col='Labels',  target_size=(299, 299), shuffle=False, batch_size=10, seed=10)
x_test = img_datagen.flow_from_dataframe(dataframe = test,  x_col='Filepath', y_col='Labels',  target_size=(299, 299), shuffle=False, batch_size=10, seed=10)

Found 11740 validated image filenames belonging to 10 classes.
Found 1118 validated image filenames belonging to 10 classes.
Found 2796 validated image filenames belonging to 10 classes.


## 5. Modelling the architecture

In [75]:
from keras.models import Sequential
from keras.layers.pooling import GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Dropout

i_model = InceptionV3(weights= 'imagenet', include_top=False, input_shape=(299, 299, 3))

model = Sequential()
model.add(i_model)
model.add(GlobalAveragePooling2D())
model.add(Dropout(0.25))
model.add(Dense(128))
model.add(Dropout(0.25))
model.add(Dense(10, activation = 'softmax'))
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_resnet_v2 (Functio (None, 8, 8, 1536)        54336736  
_________________________________________________________________
global_average_pooling2d_3 ( (None, 1536)              0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 1536)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               196736    
_________________________________________________________________
dropout_5 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 10)                1290      
Total params: 54,534,762
Trainable params: 54,474,218
Non-trainable params: 60,544
_____________________________________

## 6. Compiling the model

In [76]:
model.compile(optimizer = SGD(),
             loss="categorical_crossentropy",
             metrics=["accuracy"])

## 7. Training the model

In [77]:
history = model.fit(x_train, validation_data = x_val,steps_per_epoch = 175,validation_steps = 50,
                epochs = 10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 8. Ploting the accuracy and loss

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

## 9. Testing the model

In [None]:
predictions = model.predict(x_test)
predictions = np.argmax(predictions, axis=1)

In [None]:
labels = x_train.class_indices
labels

In [None]:
test["Labels"].replace({"Bombay": 0,'British Shorthair': 1,
 'Burmese': 2,
 'Calico': 3,
 'Dilute Calico': 4,
 'Himalayan': 5,
 'Munchkin': 6,
 'Ragdoll': 7,
 'Russian Blue': 8,
 'Siberian': 9}, inplace = True)


## 10. Evaluating the test data

## Test Accuracy

In [78]:
test_accuracy = model.evaluate(x_test)[1] * 100
print('Test accuracy is : ',test_accuracy, '%' )

Test accuracy is :  73.31902980804443 %


## Confusion Matrix

In [None]:
confusion_matrix(test.Labels , predictions)

## F1 Score

In [None]:
from sklearn.metrics import accuracy_score, f1_score
print('F1 score is',f1_score(test.Labels, predictions, average = 'weighted') *100, "%")


## ROC - AUC Score

In [79]:
predicted_probab =model.predict_proba(x_test)
predicted_probab



array([[2.3692778e-04, 1.3087247e-03, 5.5232231e-04, ..., 1.1765634e-02,
        5.7315640e-04, 9.6600963e-04],
       [3.1378046e-03, 1.9602696e-03, 3.2127231e-02, ..., 7.0342869e-01,
        1.7122520e-03, 7.6020123e-03],
       [9.6856445e-01, 4.2303880e-03, 2.0633357e-02, ..., 8.3210971e-04,
        3.9650099e-03, 1.9663556e-04],
       ...,
       [9.0001076e-01, 2.6778052e-02, 4.5984734e-02, ..., 2.7011151e-03,
        1.4892575e-02, 1.2706846e-03],
       [9.4108889e-03, 1.4043640e-01, 3.1928830e-03, ..., 5.7717771e-03,
        3.2894532e-03, 9.9313934e-04],
       [1.5032940e-04, 4.9978035e-04, 2.3133036e-04, ..., 7.0100343e-03,
        1.5852388e-03, 4.9489894e-04]], dtype=float32)

In [80]:
print("ROC- AUC score is", roc_auc_score( test.Labels, predicted_probab, multi_class='ovr')*100)

ROC- AUC score is 93.02359676562861
