## Goal: Build a model that can identify the brand from the product image uploaded.

### Data Info:

- Data has not been collected well and has a lot of gaps.
- Product images without brand info should be considered as fake images.
- Ignore the brand info with no product images.


### Model Info:

Build a stacked model system with two CNN models.

First model: 
- Identifies if a product image belongs to the original brand or a fake brand.
- Cannot use any additional features beside the image.

Second model:
- Looks at the product images identified as genuine and then determines which brand it belongs to.
- Can use some additional features that are reliable to improve its prediction accuracy.


To impress management:
- Build interpretability into your system.

In [17]:
import os
import shutil

In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [19]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import InputLayer, Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Dropout 
from keras.utils import load_img, img_to_array, np_utils

In [20]:
data = pd.read_csv('branddataset/brand_info.csv')

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15137 entries, 0 to 15136
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    15137 non-null  int64  
 1   ID            15137 non-null  int64  
 2   GenderType    15137 non-null  object 
 3   Type          15137 non-null  object 
 4   SubType       15137 non-null  object 
 5   Article       15137 non-null  object 
 6   PrimaryColor  15131 non-null  object 
 7   Seasonal      15136 non-null  object 
 8   Year          15136 non-null  float64
 9   Use           15133 non-null  object 
 10  Brand         15137 non-null  object 
dtypes: float64(1), int64(2), object(8)
memory usage: 1.3+ MB


In [21]:
data['Brand'].unique()

array(['Peter England', 'Titan', 'Puma', 'Fila', 'Gini and Jony',
       'Baggit', 'Adidas', 'John', 'Scullers', 'Nike', 'Arrow',
       'Wrangler', 'Lotto', 'United Colors of Benetton', 'Fastrack',
       'Jockey', 'Ray Ban', 'Chromozome', 'Spykar', 'Lee', 'Elle',
       'Casio'], dtype=object)

Create numpy array of all images

In [22]:
image_dir = "branddataset/images/"
images_list = os.listdir(image_dir)

no_of_images = len(images_list)


In [23]:
img = load_img(image_dir + images_list[4327])
img_arr = img_to_array(img).astype(int)

img_arr.shape

(80, 53, 3)

In [24]:
img_data = []
target = []


for i in images_list:
    img = load_img(image_dir + i)
    img_arr = img_to_array(img).astype(int)
    shape = img_arr.shape

    if (shape[0] != 80) or (shape[1] != 60):
        continue 

    img_data.append(img_arr)

    # originial - 1, fake - 0
    if int(i.rstrip('.jpg')) in data['ID']:
        target.append(1)
    else:
        target.append(0)
        

In [26]:
# converting lists to numoy array

X = np.array(img_data)
Y = np.array(target)

print(X.shape)
print(Y.shape)

(44303, 80, 60, 3)
(44303,)


In [27]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(22151, 80, 60, 3)
(22152, 80, 60, 3)
(22151,)
(22152,)


In [28]:
y_train[:10]

array([1, 1, 0, 0, 0, 0, 0, 0, 1, 1])

In [97]:
#Y_train = np_utils.to_categorical(y_train)
#Y_test = np_utils.to_categorical(y_test)

#Y_train.shape

(35442, 2)

# Build CNN model 1

In [29]:
model = Sequential()

model.add(InputLayer(input_shape=(80,60,3)))

model.add(Conv2D(64,
                 kernel_size=(3,3),
                 strides=(1,1),
                 padding='same',
                 activation='relu'
                 ))

model.add(Conv2D(128,
                 kernel_size=(3,3),
                 strides=(1,1),
                 padding='same',
                 activation='relu'
                 ))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(256,
                 kernel_size=(3,3),
                 strides=(2,2),
                 padding='same',
                 activation='relu'
                 ))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

In [30]:
model.compile(loss='binary_crossentropy',
              metrics=['accuracy'],
              optimizer='adam'
              )

In [31]:
model.fit(X_train, y_train, batch_size=256, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x28fdf4d90>

# Build CNN Model 2