In [1]:
from keras import __version__
import numpy as np
from keras.models import model_from_json, Model, Sequential
from keras.layers import Dropout, Flatten, Dense, Activation
from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.image import img_to_array, load_img, ImageDataGenerator
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from keras import optimizers
from sklearn.model_selection import train_test_split
import keras
from tempfile import TemporaryFile
import random
import csv

Using TensorFlow backend.


In [2]:
def get_clothing_classifier():
    weights_path = 'inceptionv3_clothing_classifier.h5'
    json_path = 'incep_filter_clothing_classifier.json'

    json_file = open(json_path, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model.load_weights(weights_path)
    print("Loaded model from disk")
    return loaded_model

def get_clothing_vector_model():
    loaded_model = get_clothing_classifier()
    loaded_model.layers.pop() # Get rid of the classification layer
    last = loaded_model.layers[-1].output
    model = Model(loaded_model.input, last)
    return model

def image_preprocess(img_path):
    image = load_img(img_path, target_size=(299, 299))
    image = img_to_array(image)

    # our input image is now represented as a NumPy array of shape
    # (inputShape[0], inputShape[1], 3) however we need to expand the
    # dimension by making the shape (1, inputShape[0], inputShape[1], 3)
    # so we can pass it through thenetwork
    image = np.expand_dims(image, axis=0)

    # pre-process the image using the appropriate function based on the
    # model that has been loaded (i.e., mean subtraction, scaling, etc.)
    image = preprocess_input(image)
    return image

def get_classier_prediction(clothing_classifier, img_path):
    clothes_labels = ['dresses', 'jackets', 'jeans', 'shorts', 'skirts', 
                      'sweaters', 'sweatshirts', 'womens-outerwear',
                      'womens-pants', 'womens-tops']
    img = image_preprocess(img_path)
    preds = clothing_classifier.predict(img)[0]
    preds_labels = list(zip(clothes_labels, preds))
    preds_labels.sort(key=lambda p: p[1], reverse=True)
    return preds_labels

In [3]:
clothing_classifier = get_clothing_classifier()
clothing_classifier.summary()

Loaded model from disk
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, None, None, 32 864         input_1[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, None, None, 32 96          conv2d_1[0][0]                   
____________________________________________________________________________________________________
activation_1 (Activation)        (None, None, None, 32 0           batch_normalization_1[0][0]      
____________________________________________________________________

In [4]:
jeans_img_path = 'data-all/validation/jeans/482888849.jpg'
dress_img_path = 'data-all/validation/dresses/535993294.jpg'
jackets_img_path = 'data-all/validation/jackets/614521319.jpg'
jackets_img_path2 = 'data-all/validation/jackets/614763686.jpg'
dog_img_path = 'data-pets/train/dogs/dog.0.jpg'
get_classier_prediction(clothing_classifier, dress_img_path)

[('dresses', 0.99905783),
 ('skirts', 0.00062312855),
 ('womens-tops', 0.00027859688),
 ('womens-pants', 1.842705e-05),
 ('womens-outerwear', 1.5460646e-05),
 ('shorts', 3.0073475e-06),
 ('sweaters', 1.3901767e-06),
 ('jeans', 1.3824557e-06),
 ('jackets', 5.0633832e-07),
 ('sweatshirts', 3.2763697e-07)]

In [56]:
classifier_vector_model = get_clothing_vector_model()
img_jacket = image_preprocess(jackets_img_path)
img_jacket2 = image_preprocess(jackets_img_path2)
img_jeans = image_preprocess(jeans_img_path)
img_dress = image_preprocess(dress_img_path)
img_dog = image_preprocess(dog_img_path)

jacket_feat = classifier_vector_model.predict(img_jacket)
jacket_feat2 = classifier_vector_model.predict(img_jacket2)
jeans_feat = classifier_vector_model.predict(img_jeans)
dress_feat = classifier_vector_model.predict(img_dress)
dog_feat = classifier_vector_model.predict(img_dog)

jacket_feat

Loaded model from disk


array([[ 3.12710714,  0.        ,  0.66271973, ...,  0.2265493 ,
         0.15804155,  1.66009367]], dtype=float32)

In [5]:
# compare cosine similarities
print('2 jackets: ', cosine_similarity(jacket_feat, jacket_feat2))
print('jeans and dress: ', cosine_similarity(jeans_feat, dress_feat))
print('jeans and dog: ', cosine_similarity(jeans_feat, dog_feat))

Loaded model from disk
2 jackets:  [[ 0.98380375]]
jeans and dress:  [[ 0.96236557]]
jeans and dog:  [[ 0.96911967]]


In [6]:
jacket_feat.shape # (1, 1024)

(1, 1024)

## Skipgrams Model

In [72]:
def save_product_features(model, product_file, img_dir, output_file):
    products = []
    print('Reading file ', product_file)
    with open(product_file, 'r') as f:
        for i, line in enumerate(f.readlines()[:]):
            if i % 50 == 0:
                print('Processing product ', i)
            try:
                l = line.split('\t')
                product_id = l[0]
                product_img_path = img_dir + product_id + '.jpg' 
                product_img = image_preprocess(product_img_path)
                product_feat = model.predict(product_img)
    
                products.append([product_id] + product_feat[0].tolist())
            except Exception as e:
                print(e)
                
    print('Saving products to ', output_file)
    with open(output_file, 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        for p in products:
            writer.writerow(p)
            f.flush()
    return products

def get_product_permutation_inputs(outfit_permutations_file, product_to_features):
    outfit_features = [] # each row = X, y, id
    print('Reading file ', outfit_permutations_file)
    with open(outfit_permutations_file, 'r') as f:
        for i, line in enumerate(f.readlines()):
            try:
                l = line.split()
                product1_id = l[2]
                product2_id = l[5]
                
                X_products = [(product1_id, l[3])]
                y_product = (product2_id, l[6])
                
                product1_feat = product_to_features[product1_id]
                product2_feat = product_to_features[product2_id]
                
                # merged_feat = np.concatenate((product1_feat, product2_feat), axis=1)
                # outfit_features.append((merged_feat, y_output, outfit_id))
                outfit_features.append([product1_feat, product2_feat, 
                                        X_products, y_product])
            except Exception as e:
                print(e)
    random.shuffle(outfit_features)
    
    X = []
    y = []
    X_products = []
    y_products = []
    for outfit in outfit_features:
        X.append(outfit[0])
        y.append(outfit[1])
        X_products.append(outfit[2])
        y_products.append(outfit[3])
    return np.array(X), np.array(y), X_products, y_products

def get_product_to_features(product_feats_file):
    product_to_feats = {}
    with open(product_feats_file, 'r') as tsvfile:
        tsvreader = csv.reader(tsvfile, delimiter='\t')
        for row in tsvreader:
            product_id = row[0]
            feats = np.array([[float(n) for n in row[1:]]])
            product_to_feats[product_id] = feats
    return product_to_feats

In [22]:
# Run one time to save product weights
# save_product_features(classifier_vector_model, 'data-outfits/outfit_products.tsv', 
#                       'data-outfits/images/', 'data-outfits/outfit_product_features.tsv')

Reading file  data-outfits/outfit_products.tsv
Processing product  0
Processing product  50
Processing product  100
Processing product  150
Processing product  200
Processing product  250
Processing product  300
Processing product  350
Processing product  400
Processing product  450
Processing product  500
Processing product  550
Processing product  600
Processing product  650
Processing product  700
Processing product  750
Processing product  800
Processing product  850
Processing product  900
Processing product  950
Processing product  1000
Processing product  1050
Processing product  1100
Processing product  1150
Processing product  1200
Processing product  1250
Processing product  1300
Processing product  1350
Processing product  1400
Processing product  1450
Processing product  1500
Processing product  1550
Processing product  1600
Processing product  1650
Processing product  1700
Processing product  1750
Processing product  1800
Processing product  1850
Processing product  1900
P

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [24]:
product_feats_file = 'data-outfits/outfit_product_features.tsv'
outfit_permutations_file = 'data-outfits/outfit_permutations.tsv'
prods_to_feats = get_product_to_features(product_feats_file)
prods_to_feats['615752261']


array([[ 2.87435913,  0.        ,  0.14283158, ...,  0.51789564,
         0.30757272,  0.92094642]])

In [73]:
(X, y, X_products, y_products) = get_product_permutation_inputs(outfit_permutations_file, prods_to_feats)


Reading file  data-outfits/outfit_permutations.tsv


In [74]:
np.array(X).shape # (400, 1, 2048)

(12002, 1, 1024)

In [75]:
def make_skipgrams_clothing_model():
    seq = Sequential()
    # seq.add(Dense(128, input_shape=(1,2048), activation='relu', name='fc1'))
    seq.add(Dense(1024, input_shape=(1,1024), activation='relu', name='fc1'))
    seq.add(Dropout(0.2))
    seq.add(Dense(256, activation='relu', name='fc2'))
    seq.add(Dropout(0.1))
    #seq.add(Flatten())
    seq.add(Dense(1024, activation='softmax', name='fc_final'))
    seq.compile(loss="categorical_crossentropy", optimizer=optimizers.RMSprop(lr=0.00001), metrics=["accuracy"])
    return seq

In [76]:
clothing_skipgrams_model = make_skipgrams_clothing_model()
clothing_skipgrams_model.summary()
clothing_skipgrams_model.fit(X, y, validation_split=0.3, epochs=100, batch_size=10)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
fc1 (Dense)                  (None, 1, 1024)           1049600   
_________________________________________________________________
dropout_11 (Dropout)         (None, 1, 1024)           0         
_________________________________________________________________
fc2 (Dense)                  (None, 1, 256)            262400    
_________________________________________________________________
dropout_12 (Dropout)         (None, 1, 256)            0         
_________________________________________________________________
fc_final (Dense)             (None, 1, 1024)           263168    
Total params: 1,575,168
Trainable params: 1,575,168
Non-trainable params: 0
_________________________________________________________________
Train on 8401 samples, validate on 3601 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f0699323ef0>

In [77]:
clothing_predictions = list(zip(X_products, clothing_skipgrams_model.predict(X)))
clothing_predictions[:3]

[([('534952171', ['jeans'])],
  array([[  3.43282474e-03,   7.06519074e-07,   6.68733031e-04, ...,
            3.50462709e-04,   2.63515685e-04,   8.52939906e-04]], dtype=float32)),
 ([('651257129', ['womens-tops'])],
  array([[  3.19540524e-03,   1.06691246e-07,   4.43253375e-04, ...,
            8.89367890e-04,   4.44327947e-04,   1.06019492e-03]], dtype=float32)),
 ([('649091558', ['womens-tops'])],
  array([[  3.42820631e-03,   7.48775975e-09,   3.12320713e-04, ...,
            9.37269884e-04,   3.72050970e-04,   1.05632527e-03]], dtype=float32))]

In [78]:
def get_closest_product_ids(input_vector):
    prod_cosine_sims = []
    for prod, prod_vector in prods_to_feats.items():
        sim = cosine_similarity(input_vector, prod_vector)
        prod_cosine_sims.append((prod, sim))
    return sorted(prod_cosine_sims, key=lambda p: p[1], reverse=True)[:5]   
        
for i, pred in enumerate(clothing_predictions[:20]):
    X_products, prediction_vector = pred 
    print('{} Input: {}'.format(i, X_products[0])) # Only one product for each X for now
    print('open -a Preview {}'.format(X_products[0][0] + '.jpg'))
    predicted_outputs = get_closest_product_ids(prediction_vector)
    predicted_images = [p[0] + '.jpg' for p in predicted_outputs]
    print('open -a Preview ' + ' '.join(predicted_images))

0 Input: ('534952171', ['jeans'])
open -a Preview 534952171.jpg
open -a Preview 655404693.jpg 607817031.jpg 538017987.jpg 614591849.jpg 625727413.jpg
1 Input: ('651257129', ['womens-tops'])
open -a Preview 651257129.jpg
open -a Preview 628749781.jpg 536963805.jpg 503439151.jpg 477420618.jpg 511435611.jpg
2 Input: ('649091558', ['womens-tops'])
open -a Preview 649091558.jpg
open -a Preview 503439151.jpg 634504599.jpg 628749781.jpg 655411082.jpg 639098704.jpg
3 Input: ('532520135', ['womens-pants'])
open -a Preview 532520135.jpg
open -a Preview 655404693.jpg 607817031.jpg 538017987.jpg 505902236.jpg 614591849.jpg
4 Input: ('603742660', ['womens-tops'])
open -a Preview 603742660.jpg
open -a Preview 628749781.jpg 503439151.jpg 606147530.jpg 634504599.jpg 541576780.jpg
5 Input: ('495320947', ['womens-tops'])
open -a Preview 495320947.jpg
open -a Preview 628749781.jpg 503439151.jpg 634504599.jpg 639098704.jpg 606147530.jpg
6 Input: ('271667299', ['womens-tops'])
open -a Preview 271667299.jpg

In [34]:
a = np.array([[1,2, 3]])
b = np.array([[4,5, 6]])
ab = np.concatenate((a, b), axis=1)
ab

array([[1, 2, 3, 4, 5, 6]])

In [35]:
ab.shape

(1, 6)

In [36]:
keras.utils.np_utils.to_categorical(np.array([1, 0, 0, 0]), num_classes=2)

array([[ 0.,  1.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.]])

In [45]:
c = np.array([[0]])
np.concatenate((c, a), axis=1)

array([[0, 1, 2, 3]])

In [74]:
d = np.array([1, 2, 3, 4])
np.array([0]) + d

array([1, 2, 3, 4])