In [None]:
# Importing some libraries.
import os
import cv2
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator#, img_to_array, load_img
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Model, load_model

In [None]:
index_file = pd.read_csv(r"/kaggle/input/lego-minifigures-classification/index.csv")
metadata = pd.read_csv(r"/kaggle/input/lego-minifigures-classification/metadata.csv")

In [None]:
df = pd.merge(index_file, metadata[['class_id','minifigure_name']], on='class_id')
df

In [None]:
sns.barplot()

In [None]:
df['minifigure_name'].value_counts().plot(kind='bar', figsize=(12,6), title='MINIFIGURE COUNTS')

### Seperating train and valid images with only 1 image in valid set of and rest in train set of each minifigures:

In [None]:
df_train = pd.DataFrame([])
df_valid = pd.DataFrame([])

for i in range(1,len(df['class_id'].value_counts())+1):
    df_train = df_train.append(df[df['class_id'] == i].iloc[ :-1])    
    df_valid = df_valid.append(df[df['class_id'] == i].iloc[-1: ])

In [None]:
print('no. of classes: ', len(df['class_id'].unique()))
print('no. of names: ', len(df['minifigure_name'].unique()))

In [None]:
common_dir = "/kaggle/input/lego-minifigures-classification/"

### Randomly importing 15 images from enitre dataset.

In [None]:
plt.figure(figsize=(14,10))
for i, j in enumerate(df.sample(15).iterrows(), 1):
    plt.subplot(3,5,i)
    image = cv2.imread(os.path.join(common_dir, j[1]['path']))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)
    plt.title(f"{j[1]['class_id']}: {j[1]['minifigure_name']}")
    plt.xticks([])
    plt.yticks([])

## Data Augmentation

In [None]:
train_datagen = ImageDataGenerator(rescale=1.0/255, shear_range=0.3, rotation_range=30,
                                   width_shift_range=0.3, height_shift_range=0.3,
                                   brightness_range=[0.2,1.0], horizontal_flip=True, 
                                   vertical_flip= True, fill_mode='nearest',zoom_range=0.4) 

valid_datagen = ImageDataGenerator(rescale=1.0/255)

In [None]:
train_generator = train_datagen.flow_from_dataframe(dataframe=df_train, directory=common_dir, 
                                                    x_col='path',y_col='minifigure_name',shuffle=True,
                                                   target_size=(256,256),batch_size=16)

valid_generator = valid_datagen.flow_from_dataframe(dataframe= df_valid, directory= common_dir,
                                                   x_col='path', y_col='minifigure_name',
                                                   shuffle=False, batch_size=16, target_size=(256,256))

### Adding early stopping and checkpoint to save the best model:

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=3)

# Checkpoint to save the best model measuring the val_loss.
callbacks_save = ModelCheckpoint('best LEGO-CNN.hdf5', 
                                 monitor='val_loss', 
                                 mode='min', 
                                 save_best_only=True)

### Here I am trying efficientnet architecture, you can also try mobilenetv2(better version of v1).

In [None]:
!pip install efficientnet
import efficientnet.keras as efn


In [None]:

base_model = efn.EfficientNetB6(input_shape=(256,256,3), 
                                weights='imagenet', 
                                include_top=False, 
                                pooling='avg')
# Here 3 in input_shape represent channels: 
# In RFB image their are 3 channels(coloured image in layman's language).

x = Dropout(0.3)(base_model.output)    # adding Droupout layer to the model.
prediction_efn = Dense(31, activation='softmax')(x)
model = Model(base_model.input, prediction_efn)

model.compile(optimizer = Adam(0.0001), loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
model.fit(x=train_generator, 
          validation_data=valid_generator, 
          epochs=20, 
          callbacks=[early_stop, callbacks_save])

In [None]:
loss = model.history.history['loss']
accuracy = model.history.history['accuracy']
val_loss = model.history.history['val_loss']
val_accuracy = model.history.history['val_accuracy']

## Training and validation set loss:

In [None]:
# Plotting Training and Validation set loss.

plt.plot(loss, color='r', label='loss')
plt.plot(val_loss, color='b', label='val_loss')
plt.legend()
plt.title('Training and validation loss')

## Training and validation set accuracy:

In [None]:
# Plotting Training and Validation set Accuracy.

plt.plot(accuracy, color='r', label='accuracy')
plt.plot(val_accuracy, color='g', label='validation accuracy')
plt.legend()
plt.title('Training and validation Accuracy')

In [None]:
model_LEGO = load_model('best LEGO-CNN.hdf5')

### Checking the predicting power of model by predicting a randomly imported image from valid set.

In [None]:
import random

n = len(df_valid)
random_image = random.randrange(n) # randomly selecting one number.

test_set = df_valid['path'].iloc[random_image] # Picking the image based on randomly selected number.

# Graphically representing the Image.
test_image = cv2.imread(os.path.join(common_dir, test_set))
plt.imshow(test_image)
plt.title(df_valid['minifigure_name'].iloc[random_image])

In [None]:
test_image = cv2.resize(test_image, dsize=(256,256))# reshaping the image size into(512,512)
test_image = np.reshape(test_image, (1,256,256,3)) # 1 image, (512,512) size, 3 representing the RGB type.

prediction = model.predict(test_image).argmax()

In [None]:
minifigure_name = metadata['minifigure_name'][metadata['class_id'] == prediction].iloc[0]
print('True name of predicted figure: ', minifigure_name)