In [None]:
import keras
from keras.models import Model, load_model
from keras.utils import np_utils
from keras.layers import Input, Dense, Dot, Softmax
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.utils import shuffle

import numpy as np
import pandas as pd
import pickle

# custom functions
from layer_output import get_dense_layers, paths_to_tensor
from loading import load_files       

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path, ignore_files='.DS_Store')
    shoe_files = np.array(data['filenames'])
    shoe_targets = np_utils.to_categorical(np.array(data['target']), 9)
    return shoe_files, shoe_targets

# load train, test, and validation datasets
train_files, train_targets = load_dataset('../data/shoes/train')
valid_files, valid_targets = load_dataset('../data/shoes/validate')
test_files, test_targets = load_dataset('../data/shoes/test')

In [None]:
clothing_to_vector_model = load_model('saved_models/clothes2_cnn.h5')
clothing_to_vector_model.load_weights('saved_models/weights.best.from_scratch.hdf5')

In [None]:
all_shoe_paths = np.append(train_files, np.append(valid_files, test_files))

dense_layers, dense_df = get_dense_layers(clothing_to_vector_model, all_shoe_paths)
pickle.dump(dense_df, open("saved_models/dense_shoe_df2.pickle", "wb" ))
print(dense_layers.shape)
print(dense_df.shape)
dense_df.head()

In [None]:
shoe_df = dense_df
print(shoe_df.shape)
shoe_df.head()

In [None]:
# create a lookup vector dictionary for clothing and shoes
clothing_df = pd.read_pickle('saved_models/dense_clothing_df.pickle')

clothing_dim = clothing_df.shape[1]
shoe_dim = shoe_df.shape[1]

clothing_dict = {}
for idx in clothing_df.index:
    clothing_dict[idx] = np.array(clothing_df.loc[idx]).reshape(1, clothing_dim)
    
shoe_dict = {}
for idx in shoe_df.index:
    shoe_dict[idx] = np.array(shoe_df.loc[idx]).reshape(1, shoe_dim)

In [None]:
link_df = pd.read_csv('../data/links.txt')
link_df = link_df.dropna().reset_index(drop=True)
print(link_df.shape)
link_df.head()

In [None]:
def get_dataset(df, dictionary, c_or_s):
    assert (c_or_s == 'c' or c_or_s == 's')
    if c_or_s == 'c':
        array = np.empty(shape=(0, clothing_dim))
        path = 'clothing_path'
    else:
        array = np.empty(shape=(0, shoe_dim))
        path = 'shoe_path'
    
    for i in range(df.shape[0]):
        array = np.append(array, dictionary[df[path][i]], axis=0)
        
    return array

In [None]:
link_df = shuffle(link_df)
split1 = int(link_df.shape[0]*0.8)
split2 = int(link_df.shape[0]*0.9)

train_paths = link_df.iloc[0:split1].reset_index(drop=True)
valid_paths = link_df.iloc[split1:split2].reset_index(drop=True)
test_paths = link_df.iloc[split2:].reset_index(drop=True)

train_cloth_vecs = get_dataset(train_paths, clothing_dict, 'c')
valid_cloth_vecs = get_dataset(valid_paths, clothing_dict, 'c')
test_cloth_vecs = get_dataset(test_paths, clothing_dict, 'c')

train_shoe_vecs = get_dataset(train_paths, shoe_dict, 's')
valid_shoe_vecs = get_dataset(valid_paths, shoe_dict, 's')
test_shoe_vecs = get_dataset(test_paths, shoe_dict, 's')

train_targets = np.array(link_df['response'].iloc[0:split1])
valid_targets = np.array(link_df['response'].iloc[split1:split2])
test_targets = np.array(link_df['response'].iloc[split2:])

In [None]:
clothing_input = Input(shape=[clothing_dim], name='clothing_input')
clothing_transform = Dense(shoe_dim, activation='linear', name='clothing_to_shoe')(clothing_input)

shoe_input = Input(shape=[shoe_dim], name='shoe_input')

prod = Dot(axes=1, name='DotProduct', normalize=True)([clothing_transform, shoe_input])
pred = Dense(1, name='Prediction',  activation='sigmoid')(prod)

model = Model(inputs=[clothing_input, shoe_input], outputs=[pred])
model.summary()

In [None]:
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
epochs = 30

early_stop = EarlyStopping(monitor='val_loss', patience=3)
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.vector_links2.hdf5', 
                               verbose=1, save_best_only=True)

history = model.fit([train_cloth_vecs, train_shoe_vecs], train_targets, epochs=epochs, 
                    validation_data=([valid_cloth_vecs, valid_shoe_vecs], valid_targets),
                    verbose=1, callbacks=[checkpointer, early_stop])

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.figure(figsize=(16,5))
plt.subplot(1,2,1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc', c='r')
plt.title('Training and validation accuracy')
plt.legend()
plt.subplot(1,2,2)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss', c='r')
plt.title('Training and validation loss')
plt.legend();

In [None]:
# get index of predicted item for each image in test set
predictions = model.predict([test_cloth_vecs, test_shoe_vecs])
predictions[:] = predictions[:] > 0.499999999
predictions = predictions.reshape(1, -1)
test_accuracy = 100*np.sum(predictions==test_targets) / predictions.shape[1]
test_accuracy

In [None]:
dress_to_shoe_transform = Model(inputs=[clothing_input], outputs=[clothing_transform])
dress_to_shoe_transform.predict(train_cloth_vecs).shape

In [None]:
model.get_layer(name='Prediction').get_weights()

In [None]:
dress_to_shoe_transform.save('saved_models/dress2shoe3.h5')
dress_to_shoe_transform.save_weights('saved_models/dress2shoe_weights3.hdf5')