In [2]:
import os
import cv2
import numpy as np
import pandas as pd 
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D, InputLayer
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import EfficientNetB0, ResNet50

In [4]:
def load_label_from_excel(csv_path='/kaggle/input/adiadas-nike-shoes/dataset/labelnames.csv'):
    """This function generates a dictionary mapping their names and labels

    Args:
        excel_path (string): the path where the excel path is kept
    Returns:
        dict: the image and label mapping dictionary
    """    
    # load csv file
    df = pd.read_csv(csv_path)
    # initiate mapping variable
    label_dict = {}
    # populate dictionary
    for _, row in df.iterrows():
        if ".JPG" in row['Name']:
            row['Name'] = row['Name'][:-4]
        label_dict[row['Name']] = row['Label']

    return label_dict

In [5]:
label_dict = load_label_from_excel(csv_path='/kaggle/input/adiadas-nike-shoes/dataset/labelnames.csv')
label_dict

{'aadidas_ (10)': 'Adidas',
 'aadidas_ (11)': 'Adidas',
 'aadidas_ (12)': 'Adidas',
 'aadidas_ (13)': 'Adidas',
 'aadidas_ (14)': 'Adidas',
 'aadidas_ (15)': 'Adidas',
 'aadidas_ (16)': 'Adidas',
 'aadidas_ (17)': 'Adidas',
 'aadidas_ (18)': 'Adidas',
 'aadidas_ (19)': 'Adidas',
 'aadidas_ (2)': 'Adidas',
 'aadidas_ (20)': 'Adidas',
 'aadidas_ (21)': 'Adidas',
 'aadidas_ (22)': 'Adidas',
 'aadidas_ (23)': 'Adidas',
 'aadidas_ (24)': 'Adidas',
 'aadidas_ (3)': 'Adidas',
 'aadidas_ (4)': 'Adidas',
 'aadidas_ (5)': 'Adidas',
 'aadidas_ (6)': 'Adidas',
 'aadidas_ (7)': 'Adidas',
 'aadidas_ (8)': 'Adidas',
 'aadidas_ (9)': 'Adidas',
 'Adidas (1)': 'Adidas',
 'Adidas (10)': 'Adidas',
 'Adidas (11)': 'Adidas',
 'Adidas (12)': 'Adidas',
 'Adidas (13)': 'Adidas',
 'Adidas (14)': 'Adidas',
 'Adidas (15)': 'Adidas',
 'Adidas (16)': 'Adidas',
 'Adidas (17)': 'Adidas',
 'Adidas (18)': 'Adidas',
 'Adidas (19)': 'Adidas',
 'Adidas (2)': 'Adidas',
 'Adidas (20)': 'Adidas',
 'Adidas (21)': 'Adidas',
 '

In [21]:
def transform_data(Set, path, label_dict, increase_train_image):
    """the functions process the input images for model training
    Args:
        Set (string): the set which to be converted
        path (string): the path where the images are kept
        label_dict(dictionary): the dictionary containing show and label mapping
        increase_train_image (bool): the flag whether to crop image for image augmentation in the training set
    Returns:
        np.array, np.array: processed train images and their corresponding labels
    """    
    data = []
    label = []
    for class_name in os.listdir(path):
        for img_name in os.listdir(os.path.join(path, class_name)):
            # for windows
            img_path = path + '/' + class_name + '/' + img_name 

            # read, resize and normalize image
            org_img = cv2.imread(img_path)
            img = cv2.resize(org_img , (224, 224))
            img = tf.keras.utils.normalize(img, axis=1)

            label_name = label_dict[img_name[:-4]]
            img_label = 0 if label_name == 'Adidas' else 1

            # print(img_name, img_label)
            data.append(img)
            label.append(img_label)

            if Set == 'train' and increase_train_image:
                # left crop
                crop_left = org_img[:,size:,:]
                crop_left = cv2.resize(crop_left , (224, 224))
                crop_left = tf.keras.utils.normalize(img, axis=1)
                data.append(crop_left)
                label.append(img_label)
                # right crop
                crop_right = org_img[:, :-size, :]
                crop_right = cv2.resize(crop_right , (224, 224))
                crop_right = tf.keras.utils.normalize(img, axis=1)
                data.append(crop_right)
                label.append(img_label)
                # top crop
                crop_top = org_img[size:, :, :]
                crop_top = cv2.resize(crop_top , (224, 224))
                crop_top = tf.keras.utils.normalize(img, axis=1)
                data.append(crop_top)
                label.append(img_label)
                # bottom crop
                crop_bottom = org_img[:-size, :, :]
                crop_bottom = cv2.resize(crop_bottom , (224, 224))
                crop_bottom = tf.keras.utils.normalize(img, axis=1)
                data.append(crop_bottom)
                label.append(img_label)

    print("***Successfully Converted Data***")
        
    return np.array(data), np.array(label, dtype=np.uint8)


In [26]:
train_data, train_label = transform_data('train', '/kaggle/input/adiadas-nike-shoes/dataset/train', label_dict, False)
test_data, test_label = transform_data('test', '/kaggle/input/adiadas-nike-shoes/dataset/test', label_dict, False)
val_data, val_label = transform_data('val', '/kaggle/input/adiadas-nike-shoes/dataset/validation', label_dict, False)

***Successfully Converted Data***
***Successfully Converted Data***
***Successfully Converted Data***


In [27]:
test_data

array([[[[0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         ...,
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531]],

        [[0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         ...,
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531]],

        [[0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         ...,
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531]],

        ...,

        [[0.06681531, 0.06681531, 0.06681531],
         [0.06681531, 0.06681531, 0.06681531]

In [28]:
def simple_CNN_model():
    """This function defines a simple CNN model
    Returns:
        tf.keras.model: return the defined model
    """ 
    # initiate a sequential model
    model = Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu',input_shape=(224, 224, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(2, activation='softmax'))
    model.summary()
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-4),
    metrics='accuracy')
    # print model summary
    model.summary()

    return model

In [88]:
def efficientnet():
    """This function defines the backbone of a efficientnet model
    Returns:
        tf.keras.model: return the defined model
    """ 
    # define input layer
    inputs = layers.Input(shape=(224, 224, 3))
    # add augmentation layer
    # x = img_augmentation(inputs)
    # define efficientnet config
    outputs = EfficientNetB0(include_top=True, weights=None, classes=2)(inputs)
    # generate the effcienet model
    model = tf.keras.Model(inputs, outputs)
    # compile the model
    model.compile(
        optimizer="adam", 
        loss="sparse_categorical_crossentropy", 
        metrics=["accuracy"]
    )
    # print model summary
    model.summary()

    return model

In [58]:
def plot_hist(history, figure_name):
    """ This function plot the accuracy given history of a model
    Args:
        history (object of dicionary): the history of a model
        figure_name (string): the suffix of the name of figure which willl show the accuracy
    """    
    plt.plot(history.history["accuracy"])
    plt.plot(history.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.savefig("/kaggle/working/logs/" + figure_name + '_accuracy_comparison' + 'CNN' + '.png')
    # clear plt buffer
    plt.clf()
    plt.show()

In [94]:
def train_CNN_model(train_data, train_label, val_data, val_label, tensorboard, pre_trained):
    """This functions trains the resnet model based on given parameters
    Args:
        train_data (list): the np array of training data
        train_label (list): the np array of training labels
        test_data (list): the np array of validation data
        test_label (list): the bp array of validation labels
        tensorboard (callback): tensorboard log callback
        pre_trained (bool): this flag defines whether to use pre trained model or not
    """ 
    print("***Initiating Model : Simple CNN")
    model = simple_CNN_model()
    # define checkpoint for saving the best model
    cp_callback = ModelCheckpoint(filepath='/kaggle/working/checkpoints/CNN_best_' + 'iter1.h5' ,save_weights_only=False, save_best_only=True, verbose=1)
    # load pre-trained model
    if(pre_trained):
        model.load_weights(config.paths['pre_trained_model_path'])
    # train the model
    history_CNN = model.fit(train_data, train_label, 
                batch_size=4, 
                epochs=30,
                validation_data=(val_data, val_label), 
                shuffle=True, 
                callbacks=[tensorboard, cp_callback])
    # save the model of the last epoch
    model.save("/kaggle/working/models/CNN_last_" + 'iter_1.h5')
    print("***Simple CNN Model Training Finished***")
    plot_hist(history_CNN, figure_name="CNN")

In [95]:
tensorboard = TensorBoard(log_dir="logs/{}".format('CNN'))

In [96]:
train_CNN_model(train_data, train_label, val_data, val_label, tensorboard, False)

***Initiating Model : Simple CNN
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_24 (Conv2D)          (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_24 (MaxPoolin  (None, 111, 111, 32)     0         
 g2D)                                                            
                                                                 
 conv2d_25 (Conv2D)          (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_25 (MaxPoolin  (None, 54, 54, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_26 (Conv2D)          (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_26 (Ma

<Figure size 640x480 with 0 Axes>

In [91]:
def train_efficient_net_model(train_data, train_label, val_data, val_label, tensorboard, pre_trained):
    """This functions trains the efficientnet model based on given parameters
    Args:
        train_data (list): the np array of training data
        train_label (list): the np array of training labels
        test_data (list): the np array of validation data
        test_label (list): the bp array of validation labels
        tensorboard (callback): tensorboard log callback
        pre_trained (bool): this flag defines whether to use pre trained model or not
    """    
    print("***Initiating Model : EfficientnetB0")
    model = efficientnet()
    # define checkpoint for saving the best model
    cp_callback = ModelCheckpoint(filepath='/kaggle/working/checkpoints/efficientnet_best_eff_net' + '.h5' ,save_weights_only=False, save_best_only=True, verbose=1)

    # load pre trained data
    if(pre_trained):
        model.load_weights(config.paths['pre_trained_model_path'])
    
    # train the model
    history_efficientnet = model.fit(train_data, train_label, 
                batch_size=4, 
                epochs=2, 
                validation_data=(val_data, val_label), 
                shuffle=True, 
                callbacks=[tensorboard, cp_callback])
    
    # save the model of the last epoch
    model.save("/kaggle/working/models/efficientnet_last_eff_net" + ".h5")
    print("***Efficientnet Model Training Finished***")
    plot_hist(history_efficientnet, figure_name="efficientnet")

In [92]:
train_efficient_net_model(train_data, train_label, val_data, val_label, tensorboard, False)

***Initiating Model : EfficientnetB0
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 efficientnetb0 (Functional)  (None, 2)                4052133   
                                                                 
Total params: 4,052,133
Trainable params: 4,010,110
Non-trainable params: 42,023
_________________________________________________________________
Epoch 1/2
Epoch 1: val_loss improved from inf to 2.12085, saving model to /kaggle/working/checkpoints/efficientnet_best_eff_net.h5
Epoch 2/2
Epoch 2: val_loss improved from 2.12085 to 1.93473, saving model to /kaggle/working/checkpoints/efficientnet_best_eff_net.h5
***Efficientnet Model Training Finished***


<Figure size 640x480 with 0 Axes>

In [52]:
def process_img(img_path):
    """Process the input for inference
    Args:
        img_path (string): the imaged path from where input image is kept
    Returns:
        np.array: the processed input image
    """    
    img = cv2.imread(img_path)
    img = cv2.resize(img , (224, 224))
    img = img.reshape((1, 224, 224, 3))
    img = tf.keras.utils.normalize(img, axis=1)
    return np.array(img)

In [54]:
def get_prediction(img_path, model_path):
    """this function get the prediction of a given image from a given model
    Args:
        img_path (str): the image path from where input image is kept
        model_path (_type_): the model path from where required model is kept
    Returns:
        string: predicted class name of the input image
    """    
    # img_path = ROOT_IMG_PATH + "/" + class_name + "/" + img_name
    processed_img = process_img(img_path)
    model = tf.keras.models.load_model(model_path)
    prediction = model.predict(processed_img)
    label_number = np.argmax(prediction)
    class_name = 'Adidas' if label_number == 0 else 'Nike'
    return class_name

In [57]:
#single image inference
inference_image_path = '/kaggle/input/adiadas-nike-shoes/dataset/test/nike/Image_10.jpg'
inference_model_path = '/kaggle/working/models/CNN_last_iter_1.h5'
output_class_name = get_prediction(inference_image_path, inference_model_path)
img_name = inference_image_path.split("/")[-1]
print(f"The predicted label for image {img_name} is {output_class_name}")

The predicted label for image Image_10.jpg is Nike


In [98]:
def generate_confusion_matrix(image_dir, model_path):
    adidas_correct = 0
    adidas_wrong = 0
    nike_correct = 0
    nike_wrong = 0
    for class_name in os.listdir(image_dir):
        for img_name in os.listdir(os.path.join(image_dir, class_name)):
            output_class_name = get_prediction(os.path.join(image_dir, class_name, img_name), model_path)  
            original_class_name = label_dict[img_name[:-4]]
            print(output_class_name, class_name, original_class_name)
            if original_class_name == 'Adidas':
                if output_class_name == 'Adidas':
                    adidas_correct += 1 
                else:
                    adidas_wrong += 1
            elif original_class_name == 'Nike':
                if output_class_name == 'Nike':
                    nike_correct += 1 
                else: 
                    nike_wrong += 1
    return adidas_correct, adidas_wrong, nike_correct, nike_wrong

In [99]:
model_path = '/kaggle/working/models/CNN_last_iter_1.h5'
image_dir = '/kaggle/input/adiadas-nike-shoes/dataset/validation'

adidas_correct, adidas_wrong, nike_correct, nike_wrong = generate_confusion_matrix(image_dir, model_path)

print("***The Confusion Matrix for test set***")
print("Label -> Adidas , Nike")
print(f'Adidas -> {adidas_correct}, {adidas_wrong}')
print(f'Nike -> {nike_wrong}, {nike_correct}')

Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Adidas nike Nike
Nike nike Nike
Nike nike Nike
Adidas nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Adidas nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Nike nike Nike
Adidas nike Nike
Nike nike Nike
Adidas adidas Adidas
Adidas adidas Adidas
Nike adidas Adidas
Adidas adidas Adidas
Nike adidas Adidas
Adidas adidas Adidas
Nike adidas Adidas
Nike adidas Adidas
Adidas adidas Adidas
Nike adidas Adidas
Nike adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
Nike adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
Nike adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
Nike adidas Adidas
Nike adidas Adidas
Nike adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
Adidas adidas Adidas
***The

In [None]:
'''In this segment I have tried a simple CNN model and EfficientNet Backbone architecture to do this specific task.

Problem in this task
1. Number of Data is very low
2. Few images does not represent specific shoes of that perticular set
3. Logo is not visible in that show
4. There was a wrong lebel on the CSV file

Approaches that could have made improve performance of this task
1. Transfer learning
    Using a pre-trained model which have seen many images of this type that model could been used as a pre-trained model
    for this task. I have kept the option to add pre-trained model in my code.
2. Ensemble Learning
    Ensemble learning would made the file model perform much better. I have implemented this code in my git repository code
    but didn't have the time to add this code on this repo.
3. Image Augmentation and Cleaning
    The number of image given for this task is very low. There are some outlier images too. Data cleaning and adding augmented
    image will surely improve performance of this task.
4. Transformer Based object Detector
    Currently transformer based object detector are working wonder. For this task this should have improved performance. But
    this small dataset and images with low resolation that didn't seem required.

'''