In [1]:
import os
import cv2 as cv
import numpy as np
import gc
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models

In [2]:
IMG_SIZE = (80, 80)
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

In [3]:
characters_size = {}
for char in os.listdir(char_path):
    path = os.path.join(char_path, char)
    l = len(os.listdir(path))
    characters_size[char] = l

In [4]:
characters_size = dict(sorted(characters_size.items(), key=lambda x : -x[1]))

In [5]:
characters = []
count = 0
for char, size in characters_size.items():
    characters.append(char)
    count += 1
    if count >= 10:
        break
print(characters)

In [6]:
features = []
labels = []
for char in characters:
    path = os.path.join(char_path, char)
    label = characters.index(char)
    label = [label]
    label = np.array(label, dtype=np.uint8)
    
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)

        img_array = cv.imread(img_path)
        rgb = cv.cvtColor(img_array, cv.COLOR_BGR2RGB)
        
        img = cv.resize(rgb, IMG_SIZE, interpolation=cv.INTER_AREA)
        
        features.append(img)
        labels.append(label)

In [7]:
plt.figure()
plt.imshow(features[93])
plt.show()

In [8]:
features = np.array(features)
labels = np.array(labels)

In [9]:
#Normalize
features = features/255.0

In [10]:
data = []
for i in range(len(features)):
    data.append([features[i], labels[i]])
data = np.array(data, dtype = 'object')
np.random.shuffle(data)

train_data = data[:12000]
test_data = data[12000:]

train_images = []
train_labels = []
test_images = []
test_labels = []
for x, y in train_data:
    train_images.append(x)
    train_labels.append(y)
for x, y in test_data:
    test_images.append(x)
    test_labels.append(y)
    
train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)
test_labels = np.array(test_labels)

In [11]:
print(train_images.shape)
print(train_labels.shape)

In [13]:
model = keras.Sequential()
model.add(layers.Conv2D(32, (3,3), padding='same', activation='relu', input_shape=(80, 80, 3)))
model.add(layers.Conv2D(32, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2), (2,2)))
model.add(layers.Dropout(0.2))
model.add(layers.Conv2D(64, (3,3), padding='same', activation='relu'))
model.add(layers.Conv2D(64, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2), (2,2)))
model.add(layers.Dropout(0.2))
model.add(layers.Conv2D(256, (3,3), padding='same', activation='relu'))
model.add(layers.Conv2D(256, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2,2), (2,2)))
model.add(layers.Dropout(0.2))
model.add(layers.Flatten())
model.add(layers.Dropout(0.2))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dense(10))

In [14]:
model.summary()

In [15]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [16]:
history = model.fit(train_images, train_labels, epochs=20, validation_split=0.2)

In [17]:
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print(test_acc)

In [20]:
#Predicting the output for single test images
x = 210

img = test_images[x]
plt.figure()
plt.imshow(img)
plt.show()

img = np.reshape(img, (1, )+img.shape)
label = test_labels[x]

pred = model.predict([img])
i = np.argmax(pred)

print("pred = ", characters[i])
print("real = ", characters[label[0]])

In [21]:
model.save('model')