In [13]:
import sys
import subprocess
import pkg_resources

required = {'numpy', 'pandas', 'tensorflow', 'keras'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed

if missing:
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)

In [14]:
import tensorflow as tf
import numpy as np
import pandas as pd
import subprocess

## Import dataset

https://cseweb.ucsd.edu/~jmcauley/datasets.html#clothing_fit

Decomposing fit semantics for product size recommendation in metric spaces <br />
Rishabh Misra, Mengting Wan, Julian McAuley <br />
RecSys, 2018 <br />

### - download data

In [15]:
DATASET_URL = 'http://deepx.ucsd.edu/public/jmcauley/renttherunway/renttherunway_final_data.json.gz'
DATASET_PATH = './renttherunway_final_data.json'

# download and extract the data
subprocess.run(['wget', DATASET_URL])
subprocess.run(['gzip', '-d', f'{DATASET_PATH}.gz'])

# read the dataframe
dataset = pd.read_json(DATASET_PATH, lines=True)
dataset

Unnamed: 0,fit,user_id,bust size,item_id,weight,rating,rented for,review_text,body type,review_summary,category,height,size,age,review_date
0,fit,420272,34d,2260466,137lbs,10.0,vacation,An adorable romper! Belt and zipper were a lit...,hourglass,So many compliments!,romper,"5' 8""",14,28.0,"April 20, 2016"
1,fit,273551,34b,153475,132lbs,10.0,other,I rented this dress for a photo shoot. The the...,straight & narrow,I felt so glamourous!!!,gown,"5' 6""",12,36.0,"June 18, 2013"
2,fit,360448,,1063761,,10.0,party,This hugged in all the right places! It was a ...,,It was a great time to celebrate the (almost) ...,sheath,"5' 4""",4,116.0,"December 14, 2015"
3,fit,909926,34c,126335,135lbs,8.0,formal affair,I rented this for my company's black tie award...,pear,Dress arrived on time and in perfect condition.,dress,"5' 5""",8,34.0,"February 12, 2014"
4,fit,151944,34b,616682,145lbs,10.0,wedding,I have always been petite in my upper body and...,athletic,Was in love with this dress !!!,gown,"5' 9""",12,27.0,"September 26, 2016"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192539,fit,66386,34dd,2252812,140lbs,10.0,work,Fit like a glove!,hourglass,LOVE IT!!! First Item Im thinking of buying!,jumpsuit,"5' 9""",8,42.0,"May 18, 2016"
192540,fit,118398,32c,682043,100lbs,10.0,work,The pattern contrast on this dress is really s...,petite,LOVE it!,dress,"5' 1""",4,29.0,"September 30, 2016"
192541,fit,47002,36a,683251,135lbs,6.0,everyday,"Like the other DVF wraps, the fit on this is f...",straight & narrow,"Loud patterning, flattering fit",dress,"5' 8""",8,31.0,"March 4, 2016"
192542,fit,961120,36c,126335,165lbs,10.0,wedding,This dress was PERFECTION. it looked incredib...,pear,loved this dress it was comfortable and photog...,dress,"5' 6""",16,31.0,"November 25, 2015"


### - apply preprocessing 

In [16]:
# drop redundant columns
columns_to_drop = ['review_date', 'review_text', 'review_summary']
dataset = dataset.drop(columns_to_drop, axis = 1)

# drop NA
dataset = dataset.dropna()

In [17]:
# one-hot encoding
columns_to_encode = ['fit', 'rented for', 'body type', 'category', 'height', 'bust size', 'weight']
one_hot = pd.get_dummies(dataset[columns_to_encode])

# drop encoded colunns and add one-hot versions
dataset = dataset.drop(columns_to_encode, axis = 1)
dataset = dataset.join(one_hot)
dataset

Unnamed: 0,user_id,item_id,rating,size,age,fit_fit,fit_large,fit_small,rented for_date,rented for_everyday,rented for_formal affair,rented for_other,rented for_party,rented for_party: cocktail,rented for_vacation,rented for_wedding,rented for_work,body type_apple,body type_athletic,body type_full bust,body type_hourglass,body type_pear,body type_petite,body type_straight & narrow,category_ballgown,category_blazer,category_blouse,category_blouson,category_bomber,category_buttondown,category_caftan,category_cami,category_cape,category_cardigan,category_coat,category_combo,category_crewneck,category_culotte,category_culottes,category_down,...,weight_248lbs,weight_249lbs,weight_250lbs,weight_253lbs,weight_255lbs,weight_256lbs,weight_260lbs,weight_263lbs,weight_264lbs,weight_265lbs,weight_270lbs,weight_271lbs,weight_273lbs,weight_275lbs,weight_276lbs,weight_280lbs,weight_285lbs,weight_288lbs,weight_290lbs,weight_300lbs,weight_50lbs,weight_58lbs,weight_70lbs,weight_79lbs,weight_80lbs,weight_85lbs,weight_86lbs,weight_87lbs,weight_88lbs,weight_89lbs,weight_90lbs,weight_91lbs,weight_92lbs,weight_93lbs,weight_94lbs,weight_95lbs,weight_96lbs,weight_97lbs,weight_98lbs,weight_99lbs
0,420272,2260466,10.0,14,28.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,273551,153475,10.0,12,36.0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,909926,126335,8.0,8,34.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,151944,616682,10.0,12,27.0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,734848,364092,8.0,8,45.0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192539,66386,2252812,10.0,8,42.0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192540,118398,682043,10.0,4,29.0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192541,47002,683251,6.0,8,31.0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192542,961120,126335,10.0,16,31.0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Train NN  

In [18]:
endcoded_target = ['fit_small', 'fit_fit', 'fit_large']
Y = dataset[endcoded_target] 
Y

Unnamed: 0,fit_small,fit_fit,fit_large
0,0,1,0
1,0,1,0
3,0,1,0
4,0,1,0
5,0,1,0
...,...,...,...
192539,0,1,0
192540,0,1,0
192541,0,1,0
192542,0,1,0


In [19]:
X = dataset
X.drop(endcoded_target, axis=1, inplace=True)
X

Unnamed: 0,user_id,item_id,rating,size,age,rented for_date,rented for_everyday,rented for_formal affair,rented for_other,rented for_party,rented for_party: cocktail,rented for_vacation,rented for_wedding,rented for_work,body type_apple,body type_athletic,body type_full bust,body type_hourglass,body type_pear,body type_petite,body type_straight & narrow,category_ballgown,category_blazer,category_blouse,category_blouson,category_bomber,category_buttondown,category_caftan,category_cami,category_cape,category_cardigan,category_coat,category_combo,category_crewneck,category_culotte,category_culottes,category_down,category_dress,category_duster,category_for,...,weight_248lbs,weight_249lbs,weight_250lbs,weight_253lbs,weight_255lbs,weight_256lbs,weight_260lbs,weight_263lbs,weight_264lbs,weight_265lbs,weight_270lbs,weight_271lbs,weight_273lbs,weight_275lbs,weight_276lbs,weight_280lbs,weight_285lbs,weight_288lbs,weight_290lbs,weight_300lbs,weight_50lbs,weight_58lbs,weight_70lbs,weight_79lbs,weight_80lbs,weight_85lbs,weight_86lbs,weight_87lbs,weight_88lbs,weight_89lbs,weight_90lbs,weight_91lbs,weight_92lbs,weight_93lbs,weight_94lbs,weight_95lbs,weight_96lbs,weight_97lbs,weight_98lbs,weight_99lbs
0,420272,2260466,10.0,14,28.0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,273551,153475,10.0,12,36.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,909926,126335,8.0,8,34.0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,151944,616682,10.0,12,27.0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,734848,364092,8.0,8,45.0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192539,66386,2252812,10.0,8,42.0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192540,118398,682043,10.0,4,29.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192541,47002,683251,6.0,8,31.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192542,961120,126335,10.0,16,31.0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [20]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from keras.models import Sequential 
from keras.layers import Dense 
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.utils import shuffle
from keras.utils.np_utils import to_categorical

# Y['fit'].replace(to_replace=['small', 'fit', 'large'], value=[-1, 0, 1], inplace=True)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7)

n_features = len(X.columns)
n_classes = len(Y.columns)

# define model
model = Sequential()
model.add(Dense(8, input_dim = n_features , activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(n_classes, activation = 'softmax'))

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (None, 8)                 3208      
                                                                 
 dense_6 (Dense)             (None, 10)                90        
                                                                 
 dense_7 (Dense)             (None, 10)                110       
                                                                 
 dense_8 (Dense)             (None, 10)                110       
                                                                 
 dense_9 (Dense)             (None, 3)                 33        
                                                                 
Total params: 3,551
Trainable params: 3,551
Non-trainable params: 0
_________________________________________________________________


In [21]:
from keras.callbacks import TensorBoard

history = model.fit(X_train, Y_train,
                    batch_size=1024,
                    epochs=29,
                    validation_data=(X_test, Y_test))

Epoch 1/29
Epoch 2/29
Epoch 3/29
Epoch 4/29
Epoch 5/29
Epoch 6/29
Epoch 7/29
Epoch 8/29
Epoch 9/29
Epoch 10/29
Epoch 11/29
Epoch 12/29
Epoch 13/29
Epoch 14/29
Epoch 15/29
Epoch 16/29
Epoch 17/29
Epoch 18/29
Epoch 19/29
Epoch 20/29
Epoch 21/29
Epoch 22/29
Epoch 23/29
Epoch 24/29
Epoch 25/29
Epoch 26/29
Epoch 27/29
Epoch 28/29
Epoch 29/29


In [22]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=2,
                    subplot_titles = ['Training and Validation Accuracy',
                                      'Training and Validation Loss'])

plots_positions = [(1, 1)] * 2 + [(1, 2)] * 2
metric_names = ['accuracy', 'val_accuracy', 'loss', 'val_loss']
metric_names_short = ['A', 'VA', 'L', 'VL']
epochs = list(range(1, history.params['epochs'] + 1))

for metric, metric_short, (plot_row, plot_col) in zip(metric_names, metric_names_short, plots_positions):
  fig.add_trace(
    go.Scatter(name=f'{metric_short}',
               x=epochs,
               y=history.history[metric]),
  row=plot_row, 
  col=plot_col
)   

fig.show()