In [49]:
from pathlib import Path

from PIL import Image
import pandas as pd
import numpy as np

import sklearn.model_selection as skms

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.layers import Dense

import plotly.express as px
import plotly.graph_objects as go

In [50]:
# Load the data
image_dir = Path('./images/')

filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name = 'Filepath').astype('string')
labels = pd.Series(list(map(lambda x: ' '.join(os.path.split(x)[1].split('_')[:-1]), filepaths)), name = 'Label').astype('string')

imagepaths_df = pd.concat([filepaths, labels], axis = 1)
imagepaths_df.info()
display(imagepaths_df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7390 entries, 0 to 7389
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Filepath  7390 non-null   string
 1   Label     7390 non-null   string
dtypes: string(2)
memory usage: 115.6 KB


Unnamed: 0,Filepath,Label
0,images\Abyssinian_1.jpg,Abyssinian
1,images\Abyssinian_10.jpg,Abyssinian
2,images\Abyssinian_100.jpg,Abyssinian
3,images\Abyssinian_101.jpg,Abyssinian
4,images\Abyssinian_102.jpg,Abyssinian
...,...,...
7385,images\yorkshire_terrier_95.jpg,yorkshire terrier
7386,images\yorkshire_terrier_96.jpg,yorkshire terrier
7387,images\yorkshire_terrier_97.jpg,yorkshire terrier
7388,images\yorkshire_terrier_98.jpg,yorkshire terrier


In [51]:
# We drop the dog pictures so that we can just work with cats
imagepaths_df = imagepaths_df[~imagepaths_df['Label'].str.islower()].reset_index(drop = True)
imagepaths_df.info()
display(imagepaths_df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2400 entries, 0 to 2399
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Filepath  2400 non-null   string
 1   Label     2400 non-null   string
dtypes: string(2)
memory usage: 37.6 KB


Unnamed: 0,Filepath,Label
0,images\Abyssinian_1.jpg,Abyssinian
1,images\Abyssinian_10.jpg,Abyssinian
2,images\Abyssinian_100.jpg,Abyssinian
3,images\Abyssinian_101.jpg,Abyssinian
4,images\Abyssinian_102.jpg,Abyssinian
...,...,...
2395,images\Sphynx_94.jpg,Sphynx
2396,images\Sphynx_95.jpg,Sphynx
2397,images\Sphynx_96.jpg,Sphynx
2398,images\Sphynx_98.jpg,Sphynx


In [52]:
imagepaths_df.describe()
imagepaths_df[imagepaths_df['Label'] == 'Maine Coon']

Unnamed: 0,Filepath,Label
1200,images\Maine_Coon_1.jpg,Maine Coon
1201,images\Maine_Coon_10.jpg,Maine Coon
1202,images\Maine_Coon_100.jpg,Maine Coon
1203,images\Maine_Coon_101.jpg,Maine Coon
1204,images\Maine_Coon_102.jpg,Maine Coon
...,...,...
1395,images\Maine_Coon_95.jpg,Maine Coon
1396,images\Maine_Coon_96.jpg,Maine Coon
1397,images\Maine_Coon_97.jpg,Maine Coon
1398,images\Maine_Coon_98.jpg,Maine Coon


In [53]:
# fig = px.imshow(Image.open(imagepaths_df['Filepath'].iloc[0]), title = f'{imagepaths_df.Label.iloc[0]}')
# fig.show()

In [59]:
train, test = skms.train_test_split(imagepaths_df, random_state = 25, shuffle = True)
display(train)
display(test)

Unnamed: 0,Filepath,Label
1930,images\Russian_Blue_262.jpg,Russian Blue
1435,images\Persian_137.jpg,Persian
2208,images\Sphynx_110.jpg,Sphynx
126,images\Abyssinian_24.jpg,Abyssinian
456,images\Birman_15.jpg,Birman
...,...,...
1175,images\Egyptian_Mau_77.jpg,Egyptian Mau
255,images\Bengal_149.jpg,Bengal
2191,images\Siamese_91.jpg,Siamese
318,images\Bengal_25.jpg,Bengal


Unnamed: 0,Filepath,Label
1498,images\Persian_206.jpg,Persian
949,images\British_Shorthair_47.jpg,British Shorthair
2374,images\Sphynx_72.jpg,Sphynx
1802,images\Russian_Blue_100.jpg,Russian Blue
1000,images\Egyptian_Mau_1.jpg,Egyptian Mau
...,...,...
416,images\Birman_113.jpg,Birman
1090,images\Egyptian_Mau_187.jpg,Egyptian Mau
3,images\Abyssinian_101.jpg,Abyssinian
1869,images\Russian_Blue_170.jpg,Russian Blue


In [55]:
train_gen = ImageDataGenerator(preprocessing_function = preprocess_input, validation_split = .2)
test_gen = ImageDataGenerator(preprocessing_function = preprocess_input)

In [56]:
train_images = train_gen.flow_from_dataframe(
    dataframe = train,
    x_col = 'Filepath',
    y_col = 'Label',
    class_mode = 'categorical',
    target_size = (224, 224),
    batch_size = 32,
    seed = 25,
    subset = 'training'
)

val_images = train_gen.flow_from_dataframe(
    dataframe = train,
    x_col = 'Filepath',
    y_col = 'Label',
    class_mode = 'categorical',
    target_size = (224, 224),
    batch_size = 32,
    seed = 25,
    subset = 'validation'
)

test_images = test_gen.flow_from_dataframe(
    dataframe = test,
    x_col = 'Filepath',
    y_col = 'Label',
    class_mode = 'categorical',
    target_size = (224, 224),
    batch_size = 32,
    seed = 25,
    shuffle = False
)

Found 1440 validated image filenames belonging to 12 classes.
Found 360 validated image filenames belonging to 12 classes.
Found 600 validated image filenames belonging to 12 classes.


In [62]:
def get_compiled_model():
    mobilenet = MobileNetV2(
        input_shape = (224, 224, 3), # Input shape
        include_top = False, # Whether or not to include the final dense layers used for classifcation in mobilenet (we don't want this and will make our own)
        weights = 'imagenet', # Use the imagenet shapes
        pooling = 'avg' # pool using average of the block
    )

    mobilenet.trainable = False

    inputs = mobilenet.input
    x = Dense(128, activation = 'relu')(mobilenet.output)
    x = Dense(128, activation = 'relu')(x)
    outputs = Dense(12, activation = 'softmax')(x)

    model = tf.keras.Model(inputs = inputs, outputs = outputs)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model

In [64]:
model = get_compiled_model()
history = model.fit(train_images, validation_data = val_images, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [66]:
predict_results = model.predict(test_images)

In [79]:
result_labels = [k for k, v in test_images.class_indices.items()]
results = [result_labels[m] for m in np.argmax(predict_results, axis = 1)]

['Persian', 'Ragdoll', 'Sphynx', 'Russian Blue', 'Egyptian Mau', 'Egyptian Mau', 'British Shorthair', 'Maine Coon', 'Maine Coon', 'Abyssinian', 'British Shorthair', 'Egyptian Mau', 'Bengal', 'Egyptian Mau', 'Russian Blue', 'Sphynx', 'Russian Blue', 'Ragdoll', 'Ragdoll', 'Sphynx', 'Ragdoll', 'Egyptian Mau', 'Bengal', 'Siamese', 'Bengal', 'Sphynx', 'Russian Blue', 'Russian Blue', 'Persian', 'Maine Coon', 'British Shorthair', 'Bombay', 'Siamese', 'Siamese', 'Birman', 'Birman', 'Siamese', 'British Shorthair', 'Egyptian Mau', 'Egyptian Mau', 'Maine Coon', 'Egyptian Mau', 'Birman', 'Bengal', 'Siamese', 'Abyssinian', 'Bengal', 'Maine Coon', 'Persian', 'Maine Coon', 'Russian Blue', 'Ragdoll', 'Sphynx', 'British Shorthair', 'Bengal', 'Siamese', 'Maine Coon', 'Bombay', 'Sphynx', 'Bombay', 'Sphynx', 'Abyssinian', 'Persian', 'Sphynx', 'Abyssinian', 'Bombay', 'British Shorthair', 'Sphynx', 'British Shorthair', 'Birman', 'Sphynx', 'Abyssinian', 'Bengal', 'Sphynx', 'Abyssinian', 'Abyssinian', 'Siames

In [80]:
head = 20
sample = test.head(head)

for i in range(head):
    filepath, label = sample.iloc[i]
    label = test['Label'].iloc[i]
    fig = px.imshow(Image.open(sample['Filepath'].iloc[i]), title = f'Is: {label} | Predict: {results[i]}')
    fig.show()