In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [1]:
pip -q install git+https://github.com/qubvel/classification_models.git pickle5

Note: you may need to restart the kernel to use updated packages.


## How to Run this Code
This code can be run best on Google Colab, or on a sufficiently powerful machine. The paths used in this version are local paths of a directory where this notebook should be located. This directory should include the shuffled-images/ folder which includes all the images of the dataset, and it should also include the train.csv and test.csv files. It also needs a Data/ folder in the same directory to be created to which processed data files can be saved.

If you are running this notebook on Colab, make sure to use the variable named "path" for your convenience after setting it to the right path. The models used have been downloaded (the ResNet18 model and the general model classifier) for reproducibility, but note that the exact same results might not be produced due to a variety of reasons.

In [20]:
import pandas as pd
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16, InceptionResNetV2, MobileNetV2, Xception, InceptionV3, ResNet50
from keras.models import Sequential, load_model, Model
from keras.utils import to_categorical
from keras.layers import Dense, Dropout, Activation, Flatten, Embedding, GRU
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import load_img, img_to_array
from classification_models.keras import Classifiers
from tqdm import tqdm # use tqdm from tqdm.notebook if on Colab
import math
import matplotlib.pyplot as plt
import numpy as np
import pickle5 as pickle
from collections import Counter

act='relu'
num_classes = 27
epochs = 30
batch_size = 32
data_augmentation = True
path = "" # '/content/gdrive/My Drive/Colab Notebooks/Kaggle/'

classes = ['Accessories','Apparel Set','Bags','Belts','Bottomwear',
 'Cufflinks','Dress','Eyewear','Flip Flops','Fragrance','Free Gifts',
 'Headwear','Innerwear','Jewellery','Lips','Loungewear and Nightwear',
 'Makeup','Nails','Sandal','Saree','Scarves','Shoes','Socks','Ties','Topwear',
 'Wallets','Watches']

In [27]:
# Code used to process data
# Paths may be wrong as some of these functions were used on my local machine
def processImages(inp=path + 'train.csv', out=path + 'Data/TRAIN_IMAGES.pkl'):
    df = pd.read_csv(inp)
    images = []
    for idx, row in tqdm(df.iterrows()):
        img = load_img('shuffled-images/{}.jpg'.format(str(row['id'])), grayscale=False)
        images.append(img_to_array(img))
    images = np.array(images)
    print(images.shape)
    with open(out, 'wb') as handle:
      pickle.dump(images, handle)

def trainLabelsOrder():
    df = pd.read_csv(path + 'train.csv')
    labels = []
    for idx, row in df.iterrows():
        labels.append(classes.index(row['category']))
    labels = np.array(labels)
    print(labels.shape)
    with open(path + "Data/TRAIN_LABELS.pkl", 'wb') as handle:
      pickle.dump(labels, handle)

def cleanText(file='train.csv'):
    def hasNumbers(s): return any(char.isdigit() for char in s)
    df = pd.read_csv(file)
    for idx, row in df.iterrows():
        noisyTextTokens = row['noisyTextDescription'].split(' ')
        cleanTokens = [n for n in noisyTextTokens if not hasNumbers(n)]
        df.at[idx, 'noisyTextDescription'] = ' '.join(cleanTokens)
    df.to_csv('clean_{}'.format(file))

def processCleanText(file='clean_train.csv', out='Data/TRAIN_PLAINTEXT.pkl'):
    df = pd.read_csv(file)
    text = []
    for idx, row in df.iterrows():
        row_text = ""
        row_text += row['gender']
        row_text += " " + row['baseColour']
        row_text += " " + row['season']
        row_text += " " + row['usage']
        if str(row['noisyTextDescription']) != "nan":
            row_text += " " + str(row['noisyTextDescription'])
        text.append(row_text)
    with open(out, 'wb') as handle:
      pickle.dump(text, handle)

def loadTrainDataInOrder():
  with open(path + 'Data/TRAIN_IMAGES.pkl', 'rb') as f: train_images = pickle.load(f)
  with open(path + 'Data/TRAIN_PLAINTEXT.pkl', 'rb') as f: train_text = pickle.load(f)
  with open(path + 'Data/TRAIN_LABELS.pkl', 'rb') as f: train_labels = pickle.load(f)
  train_images = train_images.astype('float32')
  return train_images/255., train_text, to_categorical(train_labels)

def loadTestData():
  # TEST_IMAGES.pkl stores the images in order of the sample submission IDs
  with open(path + "Data/TEST_IMAGES.pkl", 'rb') as f:
    test_images = pickle.load(f)
  with open(path + "Data/TEST_PLAINTEXT.pkl", 'rb') as f:
    test_text = pickle.load(f)
  test_images = test_images.astype('float32')
  return test_images/255, test_text

def predToClass(p): return classes[np.argmax(p)]

In [22]:
'''
Uncomment the next two lines to process image 
data into .pkl files which can be used in the models
'''
processImages()
processImages(inp=path + 'test.csv', out=path + 'Data/TEST_IMAGES.pkl')
trainLabelsOrder()
cleanText('train.csv')
cleanText('test.csv')
processCleanText()
processCleanText('clean_test.csv', 'Data/TEST_PLAINTEXT.pkl')

21627it [01:10, 308.53it/s]


(21627, 80, 60, 3)


21628it [02:23, 150.98it/s]


(21628, 80, 60, 3)
(21627,)


# Train ResNet18 for Image Recognition

In [29]:
loaded_x_train, _, loaded_y_train = loadTrainDataInOrder()
ys = [np.where(l==1)[0][0] for l in loaded_y_train]
cnts = Counter(ys)
counters = [int(cnts[i]*0.2) for i in range(27)]
x_train, y_train = [], []
x_validate, y_validate = [], []
for x, y in zip(loaded_x_train, loaded_y_train):
  label = np.where(y == 1)[0][0]
  if counters[label] > 0:
    counters[label] -= 1
    x_validate.append(x)
    y_validate.append(y)
  else:
    x_train.append(x)
    y_train.append(y)
x_train, y_train = np.array(x_train), np.array(y_train)
x_validate, y_validate = np.array(x_validate), np.array(y_validate)
print(x_train[0].shape, y_train[0].shape) # (80, 60, 3) (27,)
print(len(x_validate), len(y_validate)) # 4315 4315
print(len(x_train), len(y_train)) # 17312 17312

(80, 60, 3) (27,)
4315 4315
17312 17312


In [30]:
def train_model(model, name, xtrain, ytrain, xval, yval):
  save_best_model = ModelCheckpoint(path+name+'.h5', 
                                    monitor='val_accuracy', 
                                    mode='max', 
                                    save_best_only=True, 
                                    verbose=0)
  datagen = ImageDataGenerator(
      featurewise_center=False,
      samplewise_center=False,
      featurewise_std_normalization=False,
      samplewise_std_normalization=False,
      zca_whitening=False,
      zca_epsilon=1e-06,
      rotation_range=0,
      width_shift_range=0,
      height_shift_range=0,
      shear_range=0.,
      zoom_range=0.,
      channel_shift_range=0.,
      fill_mode='nearest',
      cval=0.,
      horizontal_flip=False,
      vertical_flip=False,
      rescale=None,
      preprocessing_function=None,
      data_format=None,
      validation_split=0.0)
  datagen.fit(xtrain)
  val_data = (xval, yval)
  if xval is None or yval is None: val_data = None
  history = model.fit(datagen.flow(xtrain, ytrain, batch_size=batch_size),
                      steps_per_epoch=math.ceil(xtrain.shape[0]/batch_size),
                      epochs=epochs,
                      validation_data=None,
                      callbacks=[save_best_model],
                      verbose=2)
  return history

In [None]:
from keras.layers import GlobalAveragePooling2D
from keras.models import Model

ResNet18, preprocess_input = Classifiers.get('resnet18')
base_model = ResNet18(input_shape=(80, 60, 3), include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# First run with val. set being 20% of each class, and then train with full training set, no val. set
train_model(model, "resnet18", loaded_x_train, loaded_y_train, None, None)

# Mix Images and Text data (text using tf universal encoder)

In [31]:
img_data_train, text_data_train, labels_train = loadTrainDataInOrder()
print(img_data_train.shape) #(21627, 80, 60, 3)
print(len(text_data_train)) # 21627
print(labels_train.shape)   #(21627, 27)

(21627, 80, 60, 3)
21627
(21627, 27)


In [35]:
import tensorflow_hub as hub

images_model = load_model(path + 'resnet18 0.9279') # test this with the other model as well
img_extractor = Model(images_model.input, images_model.layers[-3].output)
img_features = img_extractor.predict(img_data_train)
print(img_features.shape) # (21627, 512)

embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
text_features = embed(text_data_train).numpy()
print(text_features.shape) # (21627, 512)

train_features = np.concatenate((img_features, text_features), axis=1)
print(train_features.shape) # (21627, 1024)

(21627, 512)




(21627, 512)
(21627, 1024)


In [None]:
loaded_x_train, loaded_y_train = train_features, labels_train

ys = [np.where(l==1)[0][0] for l in loaded_y_train]
cnts = Counter(ys)
counters = [int(cnts[i]*0.2) for i in range(27)]
x_train, y_train = [], []
x_validate, y_validate = [], []
for x, y in zip(loaded_x_train, loaded_y_train):
  label = np.where(y == 1)[0][0]
  if counters[label] > 0:
    counters[label] -= 1
    x_validate.append(x)
    y_validate.append(y)
  else:
    x_train.append(x)
    y_train.append(y)
x_train, y_train = np.array(x_train), np.array(y_train)
x_validate, y_validate = np.array(x_validate), np.array(y_validate)
print(x_train[0].shape, y_train[0].shape) # (1024,) (27,)
print(len(x_validate), len(y_validate)) # 4315 4315
print(len(x_train), len(y_train)) # 17312 17312

In [37]:
def train_meta_model(model, name, trainx, trainy, valx, valy):
  # create a callback that will save the best model while training
  save_best_model = ModelCheckpoint(path+name+'.h5', monitor='val_accuracy', 
                                    mode='max', save_best_only=True, verbose=0)
  val_data = (valx, valy)
  if valx is None or valy is None: val_data = None
  history = model.fit(trainx, trainy,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_data=val_data,
                      shuffle=True,
                      callbacks=[save_best_model],
                      verbose=2)
  return history

In [40]:
meta_model = Sequential()
meta_model.add(Dense(1024, input_shape=(1024,), activation='relu'))
meta_model.add(Dense(512, activation='relu'))
meta_model.add(Dense(256, activation='relu'))
meta_model.add(Dense(128, activation='relu'))
meta_model.add(Dense(num_classes, activation='softmax'))
meta_model.compile(loss='categorical_crossentropy', 
                   optimizer=keras.optimizers.Adam(lr=0.0001, decay=1e-6), metrics=['accuracy'])

In [None]:
epochs = 10
# first train with validation set, and then train with full training
train_meta_model(meta_model, "meta_model run5", loaded_x_train, loaded_y_train, None, None)

In [42]:
test_image_data, test_text_data = loadTestData()
test_img_features = img_extractor.predict(test_image_data)
test_text_features = embed(test_text_data).numpy()
test_features = np.concatenate((test_img_features, test_text_features), axis=1)
test_features.shape # (21628, 1024) if plaintext

(21628, 1024)

In [48]:
meta_model = load_model(path + "meta_model run5.h5")
preds = meta_model.predict(test_features)
pred_classes = [predToClass(p) for p in preds]
print(len(pred_classes)) # 21628

21628


In [51]:
df = pd.read_csv('test.csv')
df['category'] = pred_classes
df[['id', 'category']].to_csv('submission 4.csv', index=False)