In [None]:
import os

import pandas as pd
import numpy as np

import cv2
import matplotlib.pyplot as plt

In [None]:
DIRECTORY = '../input/lego-minifigures/'

IMAGE_SIZE = 224

In [None]:
# read index.csv
data = pd.read_csv(DIRECTORY + 'index.csv')

TOTAL_CHARACTER_ID = len(data['class_id'].unique())

In [None]:
# read images
lego_images = []
lego_character_id = []

for path in data['path']:
    image = cv2.imread(str(DIRECTORY + 'train/' + path))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image,(IMAGE_SIZE, IMAGE_SIZE))
    scaled_image = np.array(image)/255
    lego_images.append(scaled_image)

lego_images = np.array(lego_images)
# character_id will start from 0
lego_character_id = np.array(data['class_id']) - 1

In [None]:
lego_images[10]

In [None]:
# split images to train and validation data
from sklearn.model_selection import train_test_split

image_train, image_validation, character_id_train, character_id_validation = train_test_split(
    lego_images, lego_character_id, train_size = 0.8, test_size = 0.2, random_state = 0)

In [None]:
# check the shape of train and validation images
print(f'train size : {image_train.shape}')
print(f'test size : {image_validation.shape}')

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.applications import MobileNetV2

In [None]:
# load mobile net v2 model
base_model = MobileNetV2()

# add dropout layer
model_layer = Dropout(0.5)(base_model.layers[-2].output)

# add dense layer
outputs = Dense(TOTAL_CHARACTER_ID, activation = 'softmax')(model_layer)

# creating model
model = Model(base_model.inputs, outputs)

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer = Adam(lr = 0.0001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
# save the model
model.save('model.h5')

In [None]:
import tensorflow as tf
from tensorflow import keras

CHECKPOINT_DIRECTORY = 'checkpoints/movbile_net_v2_ckpts_{epoch:03d}.ckpt'
BEST_CHECKPOINT_DIRECTORY = 'checkpoints/movbile_net_v2_best.ckpt'

os.makedirs(os.path.dirname(CHECKPOINT_DIRECTORY), exist_ok=True)
os.makedirs(os.path.dirname(BEST_CHECKPOINT_DIRECTORY), exist_ok=True)

# callbacks
early_stopping = keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5, verbose = 2)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(CHECKPOINT_DIRECTORY)
best_model_checkpoint = tf.keras.callbacks.ModelCheckpoint(BEST_CHECKPOINT_DIRECTORY,
                                                           save_best_only = True)

In [None]:
# train the model

EPOCHS = 30
history = model.fit(image_train, character_id_train,
                    callbacks = [early_stopping, model_checkpoint, best_model_checkpoint],
                    epochs = EPOCHS, validation_data = (image_validation, character_id_validation),
                    shuffle = True, batch_size = 4)

In [None]:
# graph training results
plt.figure(figsize = (16, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label = 'training loss')
plt.plot(history.history['val_loss'], label = 'validation loss')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label= 'training accuracy')
plt.plot(history.history['val_accuracy'], label='validation accuracy')
plt.grid()
plt.legend()

In [None]:
# load the best checkpoint
best_model = tf.keras.models.load_model(BEST_CHECKPOINT_DIRECTORY)

In [None]:
# get test images
filenames = os.listdir(DIRECTORY + 'test/')

test_images = []

for filename in os.listdir(DIRECTORY + 'test/'):
    test_image = cv2.imread(DIRECTORY + 'test/' + filename)
    test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
    test_image = cv2.resize(test_image,(IMAGE_SIZE, IMAGE_SIZE))
    scaled_image = np.array(test_image) / 255
                            
    test_images.append(scaled_image)

test_images = np.array(test_images)

In [None]:
test_images.shape

In [None]:
np.array(filenames)

In [None]:
np.array(filenames).shape

In [None]:
# read metadata
metadata = pd.read_csv(DIRECTORY + 'metadata.csv')

In [None]:
metadata

In [None]:
# read test character id
test_character_id = pd.read_csv(DIRECTORY + 'test.csv')

In [None]:
test_character_id

In [None]:
actual_test_id = []

test_id = np.array(test_character_id)

for c_index in range(len(filenames)):
    t_index = int(filenames[c_index].split('.')[0]) - 1
    actual_test_id.append(test_id[t_index][1])

actual_test_id = np.array(actual_test_id)

In [None]:
actual_test_id

In [None]:
 predictions = best_model.predict(test_images)

In [None]:
predictions.shape

In [None]:
predicted_character_id = np.argmax(predictions, axis = 1)
predicted_character_id = predicted_character_id + 1

In [None]:
print('Test actual id')
print(actual_test_id)
print('Test predicted id')
print(predicted_character_id)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

matrix = confusion_matrix(actual_test_id, predicted_character_id)
plot_confusion_matrix(conf_mat = matrix, figsize = (8,8), class_names = data['class_id'].unique())

In [None]:
for index in range(len(test_images)):   
    plt.imshow(test_images[index])
    
    character_name = metadata['minifigure_name'][metadata['class_id'] == predicted_character_id[index]].iloc[0]
    if actual_test_id[index] == predicted_character_id[index]:
        prediction_label = 'Prediction: True'
    else:
        prediction_label = 'Prediction: False'
        
    plt.xlabel('minifigure name : ' + character_name + '\n' + prediction_label)
    plt.show()