# Clothes Image Classifier

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# link to Data: https://www.kaggle.com/competitions/fiu-cap5610-spring22/data

### Reading the Dataset

In [2]:
X_Y_train_img = pd.read_csv("/kaggle/input/project/train.csv")
X_test_img  = pd.read_csv("/kaggle/input/project/test.csv")
print(X_Y_train_img.shape, X_test_img.shape)

(30684, 2) (13151, 1)


**Loading all the images whose image ID is present in the training set.<br>
Each image is adjusted to 32 x 32 pixels and is converted into an array.<br>
X_train consists of all the training images (array).**

In [3]:
from keras.preprocessing import image
import matplotlib.pyplot as plt
IMG_HT = 32
IMG_WD = 32

img_path = '/kaggle/input/images/images/'
train_image = []
train_label = []
for i in range(0, len(X_Y_train_img)):
    img = image.load_img(img_path + X_Y_train_img['img_id'][i].astype('str')+'.jpg', target_size=(IMG_HT, IMG_WD), grayscale=False)
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)

X_train = np.array(train_image)
Y_train = X_Y_train_img['label'].values
X_train.shape

(30684, 32, 32, 3)

In [4]:
Y_train = pd.get_dummies(Y_train)
Y_train = Y_train.to_numpy()
Y_train.shape

(30684, 4)

****Loading all the images whose image ID is present in the test set.<br>
Each image is adjusted to 32 x 32 pixels and is converted into an array.<br>
X_test consists of all the test images (array).****

In [5]:
X_Y_test_img  = pd.read_csv("/kaggle/input/project/sample_submission.csv")
img_path = '/kaggle/input/images/images/'
test_image = []
test_label = []
for i in range(0, len(X_Y_test_img)):
    img = image.load_img(img_path + X_Y_test_img['img_id'][i].astype('str')+'.jpg', target_size=(IMG_HT, IMG_WD), grayscale=False)
    img = image.img_to_array(img)
    img = img/255
    test_image.append(img)

X_test = np.array(test_image)
Y_test = X_Y_test_img['label'].values
X_test.shape

(13151, 32, 32, 3)

In [6]:
Y_test = pd.get_dummies(Y_test)
Y_test.head()
Y_test = Y_test.to_numpy()
Y_test.shape

(13151, 4)

## Building the model using Convolutional Networks
The model consists of 2 convolution layers with 32, 32, 48, 48 number of nodes in each layer respectively and 2 fully connected layers with 256 and 64 nodes respectively.<br>
Final output layer consists of 4 nodes with activation softmax.<br>
All other layers (convolution and fully-connected) use relu activation.<br>
Average Pooling are also used.

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers import BatchNormalization

model = Sequential()

#LeNet modified 2 Accuracy = 0.98342
model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu', input_shape=(IMG_HT, IMG_WD, 3)))
model.add(AveragePooling2D(pool_size=2, strides=2))
model.add(Conv2D(filters=48, kernel_size=(5,5), padding='valid', activation='relu'))
model.add(AveragePooling2D(pool_size=2, strides=2))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(84, activation='relu'))
model.add(Dense(4, activation='softmax'))

#### The model runs for 20 epochs. Adam optimizier is used along with a batch size of 128 examples is used in fitting the model.

In [8]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
model.fit(X_train, Y_train, epochs=20, batch_size=128)
# model.fit(X_train, Y_train, epochs=20, batch_size=128, validation_split = 0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7d7c855449d0>

#### Making predictions on the test set

In [10]:
prediction = model.predict(X_test)
Y_Pred = pd.DataFrame(prediction, columns = ['Accessories', 'Apparel', 'Footwear', 'Personal Care'])
Y_Pred.head()

Unnamed: 0,Accessories,Apparel,Footwear,Personal Care
0,0.9580058,0.033982,0.0002188428,0.007793465
1,1.065984e-10,1.0,4.357935e-11,9.797486e-18
2,6.777984e-10,1.0,1.711469e-10,4.444432e-25
3,1.543593e-06,0.999998,6.203912e-08,4.591465e-14
4,2.567884e-08,1.0,9.17244e-08,2.32313e-13


#### Writing the predictions off to a .csv file

In [11]:
output_df = pd.DataFrame()
output_df['img_id'] = X_Y_test_img['img_id']
output_df['label'] = Y_Pred.idxmax(axis=1)
output_df.head()

Unnamed: 0,img_id,label
0,26726,Accessories
1,26241,Apparel
2,41082,Apparel
3,2838,Apparel
4,23533,Apparel


In [12]:
output_df.to_csv('output.csv',index=False)