In [1]:
from keras import __version__
import numpy as np
from keras.models import model_from_json, Model, Sequential
from keras.layers import Dropout, Flatten, Dense, Activation
from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.image import img_to_array, load_img, ImageDataGenerator
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from keras import optimizers
from sklearn.model_selection import train_test_split
import keras
from tempfile import TemporaryFile
import random
import csv

Using TensorFlow backend.


In [11]:
def get_clothing_classifier():
    weights_path = 'inceptionv3_clothing_classifier.h5'
    json_path = 'incep_filter_clothing_classifier.json'

    json_file = open(json_path, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model.load_weights(weights_path)
    print("Loaded model from disk")
    return loaded_model

def get_clothing_vector_model():
    weights_path = 'inceptionv3_clothing_classifier.h5'
    json_path = 'incep_filter_clothing_classifier.json'

    json_file = open(json_path, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model.load_weights(weights_path)
    print("Loaded model from disk .. popping off classification layer")
    
    loaded_model.layers.pop() # Get rid of the classification layer
    last = loaded_model.layers[-1].output
    model = Model(loaded_model.input, last)
    return model

def image_preprocess(img_path):
    image = load_img(img_path, target_size=(299, 299))
    image = img_to_array(image)

    # our input image is now represented as a NumPy array of shape
    # (inputShape[0], inputShape[1], 3) however we need to expand the
    # dimension by making the shape (1, inputShape[0], inputShape[1], 3)
    # so we can pass it through thenetwork
    image = np.expand_dims(image, axis=0)

    # pre-process the image using the appropriate function based on the
    # model that has been loaded (i.e., mean subtraction, scaling, etc.)
    image = preprocess_input(image)
    return image

def get_classier_prediction(clothing_classifier, img_path):
    clothes_labels = ['dresses', 'jackets', 'jeans', 'shorts', 'skirts', 
                      'sweaters', 'sweatshirts', 'womens-outerwear',
                      'womens-pants', 'womens-tops']
    img = image_preprocess(img_path)
    preds = clothing_classifier.predict(img)[0]
    preds_labels = list(zip(clothes_labels, preds))
    preds_labels.sort(key=lambda p: p[1], reverse=True)
    return preds_labels

In [3]:
clothing_classifier = get_clothing_classifier()
clothing_classifier.summary()

Loaded model from disk
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, None, None, 32 864         input_1[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, None, None, 32 96          conv2d_1[0][0]                   
____________________________________________________________________________________________________
activation_1 (Activation)        (None, None, None, 32 0           batch_normalization_1[0][0]      
____________________________________________________________________

In [17]:
jeans_img_path = 'data-all/validation/jeans/482888849.jpg'
dress_img_path = 'data-all/validation/dresses/535993294.jpg'
jackets_img_path = 'data-all/validation/jackets/614521319.jpg'
jackets_img_path2 = 'data-all/validation/jackets/614763686.jpg'
dog_img_path = 'data-pets/train/dogs/dog.0.jpg'
get_classier_prediction(clothing_classifier, dress_img_path)

[('dresses', 0.99905783),
 ('skirts', 0.00062312913),
 ('womens-tops', 0.00027859717),
 ('womens-pants', 1.8427136e-05),
 ('womens-outerwear', 1.5460661e-05),
 ('shorts', 3.0073502e-06),
 ('sweaters', 1.3901782e-06),
 ('jeans', 1.3824571e-06),
 ('jackets', 5.063398e-07),
 ('sweatshirts', 3.2763725e-07)]

In [18]:
classifier_vector_model = get_clothing_vector_model()
img_jacket = image_preprocess(jackets_img_path)
img_jacket2 = image_preprocess(jackets_img_path2)
img_jeans = image_preprocess(jeans_img_path)
img_dress = image_preprocess(dress_img_path)
img_dog = image_preprocess(dog_img_path)

jacket_feat = classifier_vector_model.predict(img_jacket)
jacket_feat2 = classifier_vector_model.predict(img_jacket2)
jeans_feat = classifier_vector_model.predict(img_jeans)
dress_feat = classifier_vector_model.predict(img_dress)
dog_feat = classifier_vector_model.predict(img_dog)

# compare cosine similarities
print('2 jackets: ', cosine_similarity(jacket_feat, jacket_feat2))
print('jeans and dress: ', cosine_similarity(jeans_feat, dress_feat))
print('jeans and dog: ', cosine_similarity(jeans_feat, dog_feat))

Loaded model from disk .. popping off classification layer
2 jackets:  [[ 0.98380375]]
jeans and dress:  [[ 0.96236551]]
jeans and dog:  [[ 0.96911961]]


In [19]:
jacket_feat.shape # (1, 1024)

(1, 1024)

## Simple Pairing Model

In [86]:
def get_product_features(clothing_classifier, filename, dir_path):
    pairs = []
    print('Reading file ', filename)
    with open(filename, 'r') as f:
        for i, line in enumerate(f.readlines()):
            if i % 50 == 0:
                print('Processing pair ', i)
            try:
                l = line.split()
                pair_id = l[0]
                product1_id = l[2]
                product2_id = l[5]
                product1_path = dir_path + pair_id + '_' + product1_id + '.jpg' 
                product2_path = dir_path + pair_id + '_' + product2_id + '.jpg' 

                product1_img = image_preprocess(product1_path)
                product2_img = image_preprocess(product2_path)

                product1_feat = clothing_classifier.predict(product1_img)
                product2_feat = clothing_classifier.predict(product2_img)
                
                #product1_id_feat = np.concatenate((np.array([[product1_id]]), product1_feat), axis=1)
                #product2_id_feat = np.concatenate((np.array([[product2_id]]), product2_feat), axis=1)
                
                #print(product1_id_feat, product2_id_feat)
                pairs.append([product1_id] + product1_feat[0].tolist())
                pairs.append([product2_id] + product2_feat[0].tolist())
                # merged_feat = np.concatenate((product1_feat, product2_feat), axis=1)
                # print(merged_feat)
                # pairs.append((merged_feat, y_output))
            except Exception as e:
                print(e)

    return pairs

In [87]:
def save_data(clothing_classifier, filename, image_dir, output_file):
    pairs = get_product_features(clothing_classifier, filename, image_dir)
    with open(output_file, 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        for pair in pairs:
            writer.writerow(pair)
            f.flush()
"""
print('Saving Fashionable Pairs')
save_data(model, 'data-pairs/fashionable_clothing_pairs.tsv', 
          'data-pairs/fashionable/', 'data-pairs/fashionable_classifier_outputs.tsv')

print('Saving UnFashionable Pairs')
save_data(model, 'data-pairs/unfashionable_clothing_pairs.tsv', 
          'data-pairs/unfashionable/', 'data-pairs/unfashionable_classifier_outputs.tsv')
"""

Saving Fashionable Pairs
Reading file  data-pairs/fashionable_clothing_pairs.tsv
Processing pair  0
Processing pair  50
Processing pair  100
Processing pair  150
Processing pair  200
Processing pair  250
Processing pair  300
Processing pair  350
Processing pair  400
Processing pair  450
Processing pair  500
Processing pair  550
Processing pair  600
Processing pair  650
Processing pair  700
Processing pair  750
Processing pair  800
Processing pair  850
Processing pair  900
Processing pair  950
Processing pair  1000
Processing pair  1050
Processing pair  1100
Processing pair  1150
Processing pair  1200
Processing pair  1250
Processing pair  1300
Processing pair  1350
Processing pair  1400
Processing pair  1450
Processing pair  1500
Processing pair  1550
Processing pair  1600
Processing pair  1650
Processing pair  1700
Processing pair  1750
Processing pair  1800
Processing pair  1850
Processing pair  1900
Processing pair  1950
Processing pair  2000
Processing pair  2050
Processing pair  2

In [137]:
fashionable_pairs_file = 'data-pairs/fashionable_clothing_pairs.tsv'
fashionable_product_feats_file = 'data-pairs/fashionable_classifier_outputs.tsv'
unfashionable_pairs_file = 'data-pairs/unfashionable_clothing_pairs.tsv'
unfashionable_product_feats_file = 'data-pairs/unfashionable_classifier_outputs.tsv'

def get_product_merge_pairs(pairs_filename, product_to_features, y_output):
    pairs = []
    print('Reading file ', pairs_filename)
    with open(pairs_filename, 'r') as f:
        for i, line in enumerate(f.readlines()):
            try:
                l = line.split()
                pair_id = int(l[0])
                product1_id = l[2]
                product2_id = l[5]
   
                product1_feat = product_to_features[product1_id]
                product2_feat = product_to_features[product2_id]
                merged_feat = np.concatenate((product1_feat, product2_feat), axis=1)
                #print(merged_feat)
                pairs.append((merged_feat, y_output, pair_id))
            except Exception as e:
                print(e)
    random.shuffle(pairs)
    return pairs

def get_product_to_features(product_feats_file):
    product_to_feats = {}
    with open(product_feats_file, 'r') as tsvfile:
        tsvreader = csv.reader(tsvfile, delimiter='\t')
        for row in tsvreader:
            product_id = row[0]
            feats = np.array([[float(n) for n in row[1:]]])
            product_to_feats[product_id] = feats
    return product_to_feats


In [138]:
fashionable_prods_to_feats = get_product_to_features(fashionable_product_feats_file)
unfashionable_prods_to_feats = get_product_to_features(unfashionable_product_feats_file)

fashionable_merged_pairs = get_product_merge_pairs(fashionable_pairs_file, fashionable_prods_to_feats, 1)
unfashionable_merged_pairs = get_product_merge_pairs(unfashionable_pairs_file, unfashionable_prods_to_feats, 0)

Reading file  data-pairs/fashionable_clothing_pairs.tsv
Reading file  data-pairs/unfashionable_clothing_pairs.tsv


In [126]:
def get_data(valid_pairs, invalid_pairs):
    all_pairs_X = []
    all_pairs_y = []
    for i in range(4600): # TODO: Generalize!
        all_pairs_X.append(valid_pairs[i][0])
        all_pairs_X.append(invalid_pairs[i][0])
        all_pairs_y.append(valid_pairs[i][1])
        all_pairs_y.append(invalid_pairs[i][1])
    return np.array(all_pairs_X), np.array(all_pairs_y)

#y_categorical = keras.utils.np_utils.to_categorical(y, num_classes=2)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X, y = get_data(fashionable_merged_pairs, unfashionable_merged_pairs)

In [127]:
np.array(X).shape # (400, 1, 2048)

(9200, 1, 2048)

In [128]:
def test_simple_network():
    '''Base network to be shared (eq. to feature extraction).
    '''
    seq = Sequential()
    seq.add(Dense(128, input_shape=(1,2048), activation='relu', name='fc1'))
    seq.add(Dropout(0.1))
    seq.add(Dense(64, activation='relu', name='fc2'))
    seq.add(Dropout(0.1))
    seq.add(Flatten())
    seq.add(Dense(1, activation='sigmoid', name='fc_final'))
    seq.compile(loss="binary_crossentropy", optimizer=optimizers.RMSprop(lr=0.00001), metrics=["accuracy"])
    return seq

In [130]:
fashion_classifier = test_simple_network()
fashion_classifier.summary()
fashion_classifier.fit(X, y, validation_split=0.2, epochs=25, batch_size=10)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
fc1 (Dense)                  (None, 1, 128)            262272    
_________________________________________________________________
dropout_13 (Dropout)         (None, 1, 128)            0         
_________________________________________________________________
fc2 (Dense)                  (None, 1, 64)             8256      
_________________________________________________________________
dropout_14 (Dropout)         (None, 1, 64)             0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 64)                0         
_________________________________________________________________
fc_final (Dense)             (None, 1)                 65        
Total params: 270,593
Trainable params: 270,593
Non-trainable params: 0
_________________________________________________________________
Trai

<keras.callbacks.History at 0x7f9cfd5a6d30>

In [139]:
def get_merged_feats_data(merged_pairs):
    pairs_X = []
    pairs_y = []
    pairs_id = []
    for i in range(len(merged_pairs)):
        pairs_X.append(merged_pairs[i][0])
        pairs_y.append(merged_pairs[i][1])
        pairs_id.append(merged_pairs[i][2])
    return np.array(pairs_X), np.array(pairs_y), pairs_id

In [140]:
X_fashionable, y_fashionable, ids_fashionable = get_merged_feats_data(fashionable_merged_pairs)
fashionable_predictions = list(zip(ids_fashionable, fashion_classifier.predict(X_fashionable)))
sorted(fashionable_predictions, key=lambda p: p[0])

[(0, array([ 0.43961138], dtype=float32)),
 (1, array([ 0.99979967], dtype=float32)),
 (2, array([ 0.93691063], dtype=float32)),
 (3, array([ 0.99943644], dtype=float32)),
 (4, array([ 0.93750763], dtype=float32)),
 (5, array([ 0.99802846], dtype=float32)),
 (6, array([ 0.99964273], dtype=float32)),
 (7, array([ 0.59421659], dtype=float32)),
 (8, array([ 0.89209354], dtype=float32)),
 (9, array([ 0.96761149], dtype=float32)),
 (10, array([ 0.99979657], dtype=float32)),
 (11, array([ 0.99669421], dtype=float32)),
 (12, array([ 0.9992575], dtype=float32)),
 (13, array([ 0.18790826], dtype=float32)),
 (14, array([ 0.99960655], dtype=float32)),
 (15, array([ 0.8034966], dtype=float32)),
 (16, array([ 0.9999572], dtype=float32)),
 (17, array([ 0.99939406], dtype=float32)),
 (18, array([ 0.87881482], dtype=float32)),
 (19, array([ 0.41229862], dtype=float32)),
 (20, array([ 0.99994326], dtype=float32)),
 (21, array([ 0.33647853], dtype=float32)),
 (22, array([ 0.89279366], dtype=float32)),
 

In [141]:
X_unfashionable, y_unfashionable, ids_unfashionable = get_merged_feats_data(unfashionable_merged_pairs)
unfashionable_predictions = list(zip(ids_unfashionable, fashion_classifier.predict(X_unfashionable)))
sorted(unfashionable_predictions, key=lambda p: p[0])

[(0, array([ 0.02449076], dtype=float32)),
 (1, array([ 0.0420282], dtype=float32)),
 (5, array([ 0.13719188], dtype=float32)),
 (6, array([ 0.5977267], dtype=float32)),
 (8, array([ 0.17710172], dtype=float32)),
 (9, array([ 0.02620026], dtype=float32)),
 (10, array([ 0.01502396], dtype=float32)),
 (11, array([ 0.01021814], dtype=float32)),
 (13, array([ 0.11882672], dtype=float32)),
 (14, array([ 0.07421309], dtype=float32)),
 (15, array([ 0.06368441], dtype=float32)),
 (16, array([ 0.12362023], dtype=float32)),
 (19, array([ 0.02601949], dtype=float32)),
 (20, array([ 0.0864459], dtype=float32)),
 (21, array([ 0.27315655], dtype=float32)),
 (22, array([ 0.05866903], dtype=float32)),
 (23, array([ 0.00863202], dtype=float32)),
 (24, array([ 0.8521778], dtype=float32)),
 (25, array([ 0.03227141], dtype=float32)),
 (27, array([ 0.00340265], dtype=float32)),
 (28, array([ 0.10241073], dtype=float32)),
 (29, array([ 0.12826705], dtype=float32)),
 (30, array([ 0.06598021], dtype=float32))