In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
#loading in the csv data
df = pd.read_csv('/kaggle/input/fashion-product-images-small/myntradataset/styles.csv',error_bad_lines=False)

df.head()

In [None]:
df = df.dropna()
df.nunique()
df.columns

In [None]:
# Looking at all the unique labels in all categorical columns 
cat_columns = ['gender', 'masterCategory', 'subCategory', 'articleType','baseColour', 'season', 'year', 'usage']

for col in cat_columns:
    print(col)
    print(df[col].unique())
    print('-------------------------')

The images in this Dataset are very low resolution (80x60). We will be using the categories that are visually distinct even at such a low resolution.

The categories year, usage, season, and gender mighht not be clearly visually distinct in some cases, so we wont be using them.

The categories, masterCategory and subCategory are distinct enough groups, but they are not specific enough for practical use. 

The categories we will use are articleType and baseColour(You can use more categories if you want).

There are many unique labels in these categories, we will only be using the ones with more than 1000 examples, since we would need a good number of samples for proper classifcation.

In [None]:
value_counts = df['articleType'].value_counts()

indexes = value_counts.index

values = value_counts.values

for i in range(len(value_counts)):

    if values[i] <1000:
        break

types_used = indexes[:i]
print('Article types used: ',types_used)

In [None]:
value_counts = df['baseColour'].value_counts()

indexes = value_counts.index

values = value_counts.values

for i in range(len(value_counts)):

    if values[i] <1000:
        break

colours_used = indexes[:i]
print('Base Colours used: ',colours_used)

In [None]:
# Removing all the examples with labels other than the selected ones
 
df = df[df['articleType'].isin(types_used)]
df = df[df['baseColour'].isin(colours_used)]

In [None]:
#number of examples we are left with
len(df)

Now we will load in all the images from the remaining rows, and convert them to numpy arrays with img_to_array function in keras.

In [None]:
data = []

# Reading all the images and processing the data in them 

from tensorflow.keras.preprocessing.image import img_to_array
import cv2

IX = 80
IY = 60

invalid_ids = []

for name in df.id:

    try:
        image = cv2.imread('/kaggle/input/fashion-product-images-small/myntradataset/images/'+str(name)+'.jpg')
        image = cv2.resize(image, (IX,IY) )
        image = img_to_array(image)
        data.append(image)        
    except: 
        # Images for certain ids are missing, so they are not added to the dataset  
        invalid_ids.append(name)

In [None]:
# ids of missing images
print('invalid ids:')
print(invalid_ids)

In [None]:
labels = []

used_columns = ['subCategory','baseColour']

# getting labels for the columns used

for index, row in df.iterrows():

    if row['id'] in invalid_ids:
        continue

    tags = []

    for col in used_columns:
        tags.append(row[col])

    labels.append(tags)

In [None]:
import numpy as np

# converting data into numpy arrays

data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

print(labels)

Now we will create binary vectors as the outputs of the model

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

# creating a binary vector for the input labels 

mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(labels)

print(mlb.classes_)
print(labels[0])

In [None]:
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

inputShape = (IY, IX, 3)

# A very simple sequential model is used since the images are very low resolution and the categories are fiarly distinct

model = Sequential()

model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten()) 

model.add(Dense(128))
model.add(Activation('sigmoid'))


out = len(mlb.classes_)

model.add(Dense(out))
model.add(Activation('sigmoid')) # activation function for the final layer has to be sigmoid, since mutiple output labels can have value 1
                    
model.compile(loss='binary_crossentropy', # loss function has to be binary_crossentropy, it is calculated seperately for each of the outputs
              optimizer='adam',
              metrics=['mse'])

In [None]:
from sklearn.model_selection import train_test_split

# splitting data into testing and training set 

(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.1, random_state=42)

In [None]:
batch = 32
E = 50

#training the model 
model.fit(x=trainX,y=trainY,
          epochs=E ,verbose=1)

In [None]:
preds = model.predict(testX)


# since the predictions of the model are sigmoid, we will first binarize them to 0 or 1
pred_binarized = []

for pred in preds:
    vals = []
    for val in pred:
        if val > 0.5:
            vals.append(1)
        else:
            vals.append(0)
    pred_binarized.append(vals) 

pred_binarized = np.array(pred_binarized)   


# we convert the output vectors to the predicted labels
true_test_labels = mlb.inverse_transform(testY)
pred_test_labels = mlb.inverse_transform(pred_binarized)

correct = 0
wrong = 0

# Evaluating the predictions of the model

for i in range(len(testY)):

    true_labels = list(true_test_labels[i])

    pred_labels = list(pred_test_labels[i])

    label1 = true_labels[0]
    label2 = true_labels[1]

    if label1 in pred_labels:
        correct+=1
    else:
        wrong+=1

    if label2 in pred_labels:
        correct+=1
    else:
        wrong+=1    



print('correct: ', correct)
print('missing/wrong: ', wrong)
print('Accuracy: ',correct/(correct+wrong))

We can see that this model identifies 84.61% of the labels correctly, let us see what that looks like in practice

In [None]:
for i in range(20):
    print('True labels: ',true_test_labels[i],' Predicted labels: ',pred_test_labels[i])

While we did not classify the images into based on all the category classes, we were able to classify them into more than one labels at the same time. 