In [46]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm 
from PIL import Image

Using TensorFlow backend.


In [240]:
# file structure is ./data/small/[clothing_category]/[image no.]

train_directory = './data/small/'
# Form list of training images names
directory_list  = os.listdir(train_directory)
# Convert to 224 x 224 images

ims = []

for directory in directory_list:
    # list all the images in the directory
    images_list = os.listdir(os.path.join(train_directory, directory))
    
    # open the images and resize
    ims.extend([np.array(Image.open(os.path.join(train_directory, directory, im)), dtype=np.float64).resize((224,224)) for im in images_list])

In [156]:
'''
IGNORE
d = './data/medium/'
dlist = os.listdir(d)
i = []
for dl in dlist:
    c = get_category(dl)
    i.append(c)
    
    
from collections import Counter
cnt = Counter()
for word in i:
    cnt[word] += 1

cnt
'''


Counter({'Blouse': 15,
         'Cardigan': 15,
         'Dress': 35,
         'Hoodie': 5,
         'Jacket': 9,
         'Jeans': 9,
         'Joggers': 6,
         'Kimono': 8,
         'Leggings': 8,
         'Romper': 9,
         'Shorts': 15,
         'Skirt': 12,
         'Sweater': 11,
         'Sweatpants': 6,
         'Tank': 13,
         'Tee': 26,
         'Top': 9})

In [30]:
def get_category(string):
    # split by underscore
    # get last element
    temp = string.split("_")
    return temp[-1]

In [36]:
labs = []
for direc in directory_list:
    category = get_category(direc)
    
    images_list = os.listdir(os.path.join(train_directory, direc))

    for i in range(len(images_list)):
        labs.append(category)

### Now: All the images stored under "ims" and all the simplified labels stored under "labs"

In [41]:
# one hot encoding

s = pd.Series(labs)
one_hot_categories = pd.get_dummies(s)

In [81]:
imlist = np.array([np.array(im, dtype=np.float64) for im in ims])

# Resnet Time

In [60]:
import tensorflow as tf
import keras

from keras.applications import ResNet50
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import SGD

from tqdm import tqdm

In [51]:
IM_HEIGHT = 224
IM_WIDTH = 224
NB_EPOCHS = 1
BAT_SIZE = 16
FC_SIZE = 500 # May need to train this parameter

nb_classes = len(set(labs))

In [47]:
def setup_to_transfer_learn(model, base_model):
    """Freeze all layers and compile the model"""
    adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    for layer in base_model.layers:
        layer.trainable = False
        model.compile(optimizer='adam',    
                    loss='categorical_crossentropy', 
                    metrics=['accuracy'])

In [58]:
def add_new_last_layer(base_model, nb_classes):
    """Add last layer to the convnet
    Args:
    base_model: keras model excluding top
    nb_classes: # of classes
    Returns:
    new keras model with last layer
    """
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(FC_SIZE, activation='relu')(x) #new FC layer, random init
    predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer
    model = Model(inputs = base_model.input, outputs = predictions)
    return model

In [None]:
def resnet_train(images, labels):
    
    base_model = ResNet50(weights='imagenet', include_top=False)
    model = add_new_last_layer(base_model, nb_classes)

    setup_to_transfer_learn(model, base_model)

    history = model.fit(images, labels)
    model.save("resnet.h5")
    
    return history

h = tqdm(resnet_train(imlist, one_hot_categories.values))

## Predict - Load in trained model

### Remove last layer

In [224]:
def remove_softmax(model):
    model.layers.pop() # Get rid of the classification layer
    model.outputs = [model.layers[-1].output]
    model.layers[-1].outbound_nodes = []
    
    return model

In [220]:
def get_cnn_output(model, ims):
    # ims is np array
    if len(ims.shape) == 1:
        ims = ims.reshape(1, -1)
    
    return (model.predict(ims, batch_size=BAT_SIZE))

In [225]:
model2 = ResNet50(weights='imagenet') # using imagenet for now
model2 = remove_softmax(model2)
preds = get_cnn_output(model2, imlist[0:4])

# Similarity Measure - image

In [121]:
from sklearn.metrics.pairwise import cosine_similarity

In [227]:
def get_top_k_indices(google_cnn_output, user_selected_imgs, k):
    
    '''
    NOTE: user_selected_imgs NEEDS TO BE A LIST! Even if it's only 1 item.
    It does not handle 0 items at this moment.
    
    Both google_cnn_output and user_selected_imgs are output from the CNN and a np.array
    '''
    if len(google_cnn_output.shape) == 1:
        google_cnn_output = google_cnn_output.reshape(1, -1)
    if len(user_selected_imgs.shape) == 1:
        user_selected_imgs = user_selected_imgs.reshape(1, -1)
        
    similarity_results = np.zeros((len(user_selected_imgs), len(google_cnn_output)))

    for idx, img in enumerate(user_selected_imgs):
        similarity_results[idx,:] = cosine_similarity(img.reshape(1, -1), google_cnn_output)
        
    print(similarity_results)
        
    if similarity_results.shape[0] == 1:
        sorted_indices = np.argsort(similarity_results[0])
    
    else:
        means = np.mean(similarity_results, axis=0)
        sorted_indices = np.argsort(means)
    
    if k > len(google_cnn_output):
        return (sorted_indices)
    
    top_indices = sorted_indices[-k:]

    return(list(reversed(top_indices)))

In [228]:
#get_top_k_indices(preds, preds[0:2], 4)

[[ 0.99999964  0.79235113  0.74565804  0.75529718]
 [ 0.79235113  1.00000012  0.78746468  0.78237677]]


[1, 0, 3, 2]