## Import the requirement Libraries

In [None]:
!pip install caer canaro

In [None]:
import os
import caer
import canaro
import pandas as pd
import numpy as np
import cv2 as cv
import gc
import warnings
warnings.filterwarnings('ignore')

In [None]:
# All image size
IMAGE_SIZE = (80,80)
channels = 1
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

In [None]:
# Creating a character dictionary, sorting it in descending order
char_dict = {}
for char in os.listdir(char_path):
    char_dict[char] = len(os.listdir(os.path.join(char_path, char)))

# Sort in descending order

char_dict = caer.sort_dict(char_dict, descending=True)

# print a dict
char_dict

In [None]:
#  Getting the first 10 categories with the most number of images
characters = []
count = 0
for i in char_dict:
    characters.append(i[0])
    count += 1
    if count >= 10:
        break
characters

## Create a Training data

In [None]:
train = caer.preprocess_from_dir(char_path, characters, channels= channels, IMG_SIZE = IMAGE_SIZE, isShuffle=True, verbose=0)

In [None]:
len(train) # Number of training samples

### Visualizing the data (OpenCV doesn't display well in Jupyter notebooks)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,10))
plt.imshow(train[0][0], cmap = 'gray')
plt.show()

### Separate the training set into the features and labels.

In [None]:
featureSet, labels = caer.sep_train(train, IMG_SIZE=IMAGE_SIZE)

#### Normalize the featureSet

In [None]:
from tensorflow.keras.utils import to_categorical

featureSet = caer.normalize(featureSet)
labels = to_categorical(labels, len(characters))


### Create our training and validation data

In [None]:
import sklearn.model_selection as skm 
X_train, X_val, y_train, y_val = skm.train_test_split(featureSet, labels, test_size=.2)

In [None]:
# X_train, X_val, y_train, y_val = caer.train_val_split(np.array(item) for item in split_data)

In [None]:
# Not used variable delete
del train
del featureSet
del labels
gc.collect()

### Image data generator

In [None]:
BATCH_SIZE = 32
EPOCHS = 10

In [None]:
datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(X_train, y_train, batch_size=BATCH_SIZE)

### Creating the Model

In [None]:
model = canaro.models.createSimpsonsModel(IMG_SIZE=IMAGE_SIZE, channels=channels, output_dim=len(characters), 
                                         loss='binary_crossentropy', decay=1e-7, learning_rate=0.001, momentum=0.9,
                                         nesterov=True)

In [None]:
model.summary()

### Training the model

In [None]:
from tensorflow.keras.callbacks import LearningRateScheduler
callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]
training = model.fit(train_gen,
                    steps_per_epoch=len(X_train)//BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(X_val,y_val),
                    validation_steps=len(y_val)//BATCH_SIZE, 
                    callbacks = callbacks_list)

In [None]:
characters

## Testing

In [None]:
test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'

img = cv.imread(test_path)

plt.imshow(img)
plt.show()

In [None]:
def prepare(image):
    image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    image = cv.resize(image, IMAGE_SIZE)
    image = caer.reshape(image, IMAGE_SIZE, 1)
    return image

In [None]:
predictions = model.predict(prepare(img))

In [None]:
# Getting class with the highest probability
print(characters[np.argmax(predictions[0])])