In [1]:
!pip install caer canaro

In [2]:
import os
import cv2 as cv
import numpy as np
import gc  #for garbage collection
import canaro
import caer
#when building deep 

In [5]:
#when building deep vision models, our model excepts all our image data to be of same size
#All the images in our data set will have to be of the same size, before feeding it to our actual data set
#image size of 80*80 works well,especially for simpsons
IMG_SIZE = (80,80)
channel = 1 #since we do not require colour in our image
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset' #the base path where all the actual data l

In [6]:
#we will grab top 10 characters, which have the most number of images for that class
#go through every folder inside dataset, get number of images stored in that dataset
#store the number inside a dictionary, sort the dictionary in descending order and then grab the first 10 elements
char_dict ={}
for c in os.listdir(char_path):
    char_dict[c] = len(os.listdir(os.path.join(char_path,c))) #os.path.join(...) will give char_path/(character name like agnes_skinner) and give you the path of that directory
    
#sort in descending order
char_dict = caer.sort_dict(char_dict,descending = True)
char_dict

In [7]:
character_list = []
count = 0
for i in char_dict :
    character_list.append(i[0])
    count+=1
    if count >= 10:
        break
        
character_list

In [8]:
#create the training data
train = caer.preprocess_from_dir(char_path, character_list, channels = channel, IMG_SIZE = IMG_SIZE,isShuffle = True)
#it will go to every folder inside simpson dataset, and look at every element inside characters

In [9]:
len(train)

In [10]:
#lets visualize the images which are present in our data set
import matplotlib.pyplot as plt
plt.figure(figsize = (30,30))
plt.imshow(train[0][0],cmap='gray')
plt.show()

In [11]:
#seperate the training set into features and labels
#training set is a list of 13811 lists in side it
#inside each of that sublist are two parts, the actual array and the label itself
featureSet, labels = caer.sep_train(train, IMG_SIZE = IMG_SIZE)

In [12]:
from tensorflow.keras.utils import to_categorical 
#normalize the featureSet, to be in range of 0 to 1
#if we normalize the data, network will be able to learn features much faster
featureSet = caer.normalize(featureSet)
labels = to_categorical(labels,len(character_list))
#we don't not have to normalize labels but we do need to one hot encode them, convert it to numerical integers to binary class vectors,that why we need tensorflow for this

In [13]:
#now we will try to create our training and validation data
#the model is going to train over training data
#the model is going to test over validation data
x_train,x_val,y_train,y_val = caer.train_val_split(featureSet, labels, val_ratio=0.2) #20% will go to validation set, and 80% will go to training set 

In [14]:
#we can save some datasets
del train
del featureSet
del labels
gc.collect()

In [15]:
BATCH_SIZE = 32
EPOCHS = 10 #lets train our network for 10 epochs

In [16]:
#create image data generator
#it will synthesize new images from already existing images to help introduce some randomness to our network
data_generator = canaro.generators.imageDataGenerator() #this instantiates a very simple image generator from keras library
train_gen = data_generator.flow(x_train,y_train, batch_size = BATCH_SIZE)

In [18]:
#creating our model
model = canaro.models.createSimpsonsModel(IMG_SIZE = IMG_SIZE,channels = channel, output_dim = len(character_list),loss='binary_crossentropy',
                                         decay=1e-6,learning_rate=0.001,momentum=0.9,nesterov=True)


In [19]:
model.summary()

In [20]:
#now we will create something called a call back list
#schedule the learning rate at specific intervals, so that our network can essentially train better
from tensorflow.keras.callbacks import LearningRateScheduler
callbacks_list=[LearningRateScheduler(canaro.lr_schedule)] #since we are using schedule, use tensorflow

In [21]:
#now lets train the model
training = model.fit(train_gen,
                    steps_per_epoch=len(x_train)//BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(x_val,y_val),
                    validation_steps=len(y_val)//BATCH_SIZE,
                    callbacks=callbacks_list)

In [22]:
character_list

In [109]:
#use opencv to test the accuracy of our model
test_path = r"../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/comic_book_guy_13.jpg"
img = cv.imread(test_path)
plt.imshow(img, cmap="gray")
plt.show()  

In [110]:
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

In [59]:
#pridiction = model.predict(prepare(img))

In [111]:
img = cv.resize(img, IMG_SIZE)
img = caer.reshape(img,IMG_SIZE,1)

In [112]:
prediction = model.predict(img)
prediction

In [113]:
#to print the actual class
print(character_list[np.argmax(prediction[0])])