In [None]:
import os
from pathlib import Path

import numpy as np
from numpy.random import randint
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from skimage.io import imread
from skimage.measure import block_reduce
from scipy import ndimage

from keras import Model
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
from keras.layers import Input, Flatten, Dense
from keras.layers import Multiply
from keras.layers import Conv2D
from keras.layers import MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout
from keras.layers import BatchNormalization as BatchNorm

In [None]:
from google.colab import drive
drive.mount("/content/drive")

os.chdir("/content/drive/My Drive")

## Обработка данных

Все картинки приводятся к размеру $64 \times 64 \times 3$ взятием медианы на блоке, затем применяется min-max scaling.

Возможна аугментация данных (представителей некоторых классов очень мало), в частности, случайные повороты и случайные вырезы, однако лучший результат на Kaggle был получен без неё.

In [None]:
input_size = 64
input_shape = (input_size,) * 2 + (3,)
dataset_root = Path("simpsons_dataset")

In [None]:
def load_img(img_path):
    img = imread(str(img_path)).astype(np.float32) / 255

    max_side = max(img.shape)

    if max_side > input_size:
        block_size = (max_side + input_size - 1) // input_size
        img = block_reduce(img, block_size=(block_size,) * 2 + (1,), func=np.median)

    dy = (input_size - img.shape[0]) // 2
    dx = (input_size - img.shape[1]) // 2
    img_sized = np.zeros(input_shape, dtype=np.float32)
    img_sized[dy:img.shape[0]+dy, dx:img.shape[1]+dx] = img
    img = img_sized
    
    m = img.min()
    M = img.max()
    img = (img - m) / (M - m)
    
    return img


def join_paths(*args):
    return Path.joinpath(*(Path(arg) for arg in args))

In [None]:
augment_data = False

def dir_has_data(p):
    if not p.is_dir():
        return False
    
    for x in p.iterdir():
        if x.suffix == ".jpg":
            return True
    
    return False

characters = [p.name for p in dataset_root.iterdir() if dir_has_data(p)]

character_min = 500

erase_size_min = 5
erase_size_max = 8

dataset_x = []
dataset_idx = []
dataset_y = []

count_by_character_idx = [0] * len(characters)

print("Loading data...")
progress_bar = "." * len(characters)
print(f"[{progress_bar}] 0%", end="\r")

for character_idx, character in enumerate(characters):
    character_dir = join_paths(dataset_root, character)
    
    character_vec = to_categorical(
        character_idx,
        num_classes=len(characters)
    )
    
    for img_path in character_dir.iterdir():
        if img_path.suffix != ".jpg":
            continue
        
        img = load_img(img_path)
        
        dataset_x.append(img)
        dataset_idx.append(character_idx)
        dataset_y.append(character_vec)
        count_by_character_idx[character_idx] += 1
    
    done = "=" * (character_idx + 1)
    left = "." * (len(characters) - character_idx - 1)
    progress = (character_idx + 1) / len(characters) * 100
    print(f"[{done}{left}] {progress:.1f}% ", end="\r")
    
print()

if augment_data:
    print("Augmenting data...")
    progress_k = len(dataset_x) // len(characters)
    progress_max = len(dataset_x) // progress_k
    progress_bar = "." * progress_max
    print(f"[{progress_bar}] 0%", end="\r")

    for i in range(len(dataset_x)):
        n = count_by_character_idx[dataset_idx[i]]

        if n >= character_min:
            continue

        mul_factor = (character_min + n - 1) // n

        for j in range(mul_factor - 1):
            img = dataset_x[i]

            angle = randint(-10, 11)

            img = ndimage.rotate(img, angle, reshape=False)

            m = img.min()
            M = img.max()
            img = (img - m) / (M - m)

            erase_x1 = randint(input_size - erase_size_min)
            erase_y1 = randint(input_size - erase_size_min)

            erase_x2 = randint(
                erase_x1 + erase_size_min,
                min(input_size, erase_x1 + erase_size_max) + 1
            )

            erase_y2 = randint(
                erase_y1 + erase_size_min,
                min(input_size, erase_y1 + erase_size_max) + 1
            )

            img[erase_y1:erase_y2, erase_x1:erase_x2] = \
                np.random.rand(erase_y2 - erase_y1, erase_x2 - erase_x1, 3)

            dataset_x.append(img)
            dataset_y.append(dataset_y[i])

        progress_idx = (i + 1) // progress_k
        done = "=" * progress_idx
        left = "." * (progress_max - progress_idx)
        progress = progress_idx / progress_max * 100
        print(f"[{done}{left}] {progress:.1f}% ", end="\r")

dataset_x = np.array(dataset_x)
dataset_y = np.array(dataset_y)

In [None]:
def save_array(name):
    np.save(f"{name}.npy", globals()[name])


def save_arrays(*args):
    for arg in args:
        save_array(arg)

        
def load_array(name):
    globals()[name] = np.load(f"{name}.npy")

    
def load_arrays(*args):
    for arg in args:
        load_array(arg)

In [None]:
save_arrays("dataset_x", "dataset_y")

In [None]:
load_arrays("dataset_x", "dataset_y")

In [None]:
train_x, test_x, train_y, test_y = train_test_split(dataset_x, dataset_y, test_size=0.1)

In [None]:
save_arrays("train_x", "test_x", "train_y", "test_y")

In [None]:
load_arrays("train_x", "test_x", "train_y", "test_y")

In [None]:
with open("characters.txt", "w") as f:
    f.write(" ".join(characters))

In [None]:
with open("characters.txt", "r") as f:
    characters = f.read().split()

## Архитектура нейросети

Пусть $B\left(n\right)$ – следующая последовательность слоёв: свёртка из $n$ фильров $3 \times 3$, затем ещё раз такая же свёртка, потом max pooling $2 \times 2$, dropout с вероятностью $0.25$ и batch-нормализация. После каждой свёртки используется активация $f\left(x\right) = x$ при $x \gt 0$ и $f\left(x\right) = e^x - 1$ при $x \le 0$, известная как exponential linear unit.

Нейросеть начинается с последовательности блоков $B\left(32\right) \mapsto B\left(64\right) \mapsto B\left(64\right)$. Изначально после этого шёл полносвязный слой из $512$ нейронов с dropout с вероятностью $0.5$, однако его замена на global average pooling, то есть подсчёт на каждом канале среднего значения, привела к заметному улучшению точности и значительному упрощению нейросети (в конечном варианте меньше $150$ тысяч параметров). В самом конце стоит выходной слой с активацией softmax.

In [None]:
batch_size = 32
n_epochs = 50

use_global_avg_pooling = True
use_se_block = False

In [None]:
act = "elu"

model_input = Input(shape=input_shape)

def block(x, k):
    x = Conv2D(k, 3, padding="same", activation=act)(x)
    x = Conv2D(k, 3, activation=act)(x)
    x = MaxPooling2D(2)(x)
    x = Dropout(0.25)(x)
    x = BatchNorm()(x)
    return x

def se_block(x, n):
    branch = x
    branch = GlobalAveragePooling2D()(branch)
    branch = Dense(8, activation=act)(branch)
    branch = Dense(n, activation="sigmoid")(branch)
    
    return Multiply()([x, branch])

model = model_input
model = block(model, 32)
model = block(model, 64)

if use_se_block:
    model = se_block(model, 64)

model = block(model, 64)

if use_global_avg_pooling:
    model = GlobalAveragePooling2D()(model)
else:
    model = Flatten()(model)
    model = Dense(512, activation=act)(model)
    model = Dropout(0.5)(model)

model = Dense(len(characters), activation="softmax")(model)
model = Model(inputs=model_input, outputs=model)

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["categorical_accuracy"]
)

model.summary()

In [None]:
mcp_save = ModelCheckpoint(
    "model.hdf5", 
    verbose=1,
    save_best_only=True
)

model.fit(
    train_x, train_y,
    validation_data=(test_x, test_y),
    epochs=n_epochs,
    batch_size=batch_size,
    callbacks=[mcp_save]
)

In [None]:
model = load_model("model.hdf5")

model.summary()

In [None]:
test_pred = model.predict(test_x, batch_size=batch_size)

## Построение матрицы ошибок

На матрице видно, например, что нейросеть часто принимает Пэтти за Сельму, что предсказуемо, ведь их головы отличаются только причёсками, которые на некоторых кадрах трудно отличить.

In [None]:
confusion = np.zeros((len(characters),) * 2, dtype=np.float32)

accuracy = [[0, 0] for _ in range(len(characters))]

for i in range(test_pred.shape[0]):
    predicted = test_pred[i].argmax()
    expected = test_y[i].argmax()
    
    accuracy[expected][1] += 1
    
    if predicted == expected:
        accuracy[expected][0] += 1
    else:
        confusion[predicted, expected] += 1

accs = []
        
for i, (n_correct, n_total) in enumerate(accuracy):
    if n_total == 0:
        accs.append((i, 0, n_total))
    else:
        acc = n_correct / n_total * 100
        accs.append((i, acc, n_total))
        
accs.sort(key = lambda x: x[1])
        
for i, acc, n_total in accs:
    character = characters[i]
    n_train = sum(1 for _ in join_paths(dataset_root, character).iterdir())
    print(f"{character}: {acc:.1f} ({n_total}, {n_train})")
    
plt.figure(figsize=(20, 20))
        
for axis, deg in [(plt.xticks, 90), (plt.yticks, 0)]:
    axis(list(range(len(characters))), characters, rotation=deg)

confusion /= confusion.max()

plt.imshow(confusion, origin="lower")

In [None]:
exp_char = "patty_bouvier"
pred_char = "selma_bouvier"

# exp_char = "miss_hoover"
# pred_char = "martin_prince"

mispred = [
    i for i, v in enumerate(test_y) 
    if \
        characters[v.argmax()] == exp_char and \
        characters[test_pred[i].argmax()] == pred_char
]

i = mispred[randint(len(mispred))]
img = test_x[i]
pred = list(enumerate(model.predict(np.array([img]))[0]))
pred.sort(key = lambda x: x[1], reverse=True)

print("Expected:", characters[test_y[i].argmax()])

for i, p in pred[:5]:
    print(characters[i], p)

plt.imshow(img)

In [None]:
task_root = Path("testset")

task_names = []
task_x = []

for img_path in task_root.iterdir():
    if img_path.suffix != ".jpg":
        continue
    
    img = load_img(img_path)
    
    task_names.append(img_path.name)
    task_x.append(img)

task_x = np.array(task_x)

In [None]:
with open("task_names.txt", "w") as f:
    f.write(" ".join(task_names))

save_arrays("task_x")

In [None]:
with open("task_names.txt", "r") as f:
    task_names = f.read().split()

load_arrays("task_x")

In [None]:
task_y = model.predict(task_x, batch_size=batch_size)

In [None]:
ans_df = pd.DataFrame()

ans_df["Expected"] = [characters[v.argmax()] for v in task_y]

ans_df.index = task_names
ans_df.index.name = "Id"

In [None]:
ans_df[ans_df["Expected"] == "lionel_hutz"]

In [None]:
ans_df.to_csv("ans.csv")