In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import shutil
import image_utils

from sklearn.metrics.pairwise import pairwise_distances
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import torch
import torchvision
import torch.nn as nn
from torchvision import transforms


%matplotlib inline

# Load Data

In [6]:
data_path = '../../data/fashion_models/dresses_clustered2/*.jpg'

In [7]:
# read data
filelist = glob.glob(data_path)
filelist = sorted(filelist)
print('num images: ', len(filelist))

num images:  15304


In [8]:
filenames = [os.path.basename(f).split('.jpg')[0] for f in filelist]
filenames = sorted(filenames)

# Get Features From Retrained ResNet152

In [12]:
feature_path = '../../data/features/fashion_models/dresses/resnet_retrained/'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

In [10]:
model_path = "./transfer_learning/best_model.pth"
model = torchvision.models.resnet152(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 7)
model.load_state_dict(torch.load(model_path, map_location='cpu'))
modules = list(model.children())[:-1]
model_last = nn.Sequential(*modules)

In [13]:
data_transforms = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [14]:
image_utils.save_feature_vectors_from_model(filelist, feature_path, model_last, data_transforms)

Downloaded 0 / 15304
Downloaded 100 / 15304
Downloaded 200 / 15304
Downloaded 300 / 15304
Downloaded 400 / 15304
Downloaded 500 / 15304
Downloaded 600 / 15304
Downloaded 700 / 15304
Downloaded 800 / 15304
Downloaded 900 / 15304
Downloaded 1000 / 15304
Downloaded 1100 / 15304
Downloaded 1200 / 15304
Downloaded 1300 / 15304
Downloaded 1400 / 15304
Downloaded 1500 / 15304
Downloaded 1600 / 15304
Downloaded 1700 / 15304
Downloaded 1800 / 15304
Downloaded 1900 / 15304
Downloaded 2000 / 15304
Downloaded 2100 / 15304
Downloaded 2200 / 15304
Downloaded 2300 / 15304
Downloaded 2400 / 15304
Downloaded 2500 / 15304
Downloaded 2600 / 15304
Downloaded 2700 / 15304
Downloaded 2800 / 15304
Downloaded 2900 / 15304
Downloaded 3000 / 15304
Downloaded 3100 / 15304
Downloaded 3200 / 15304
Downloaded 3300 / 15304
Downloaded 3400 / 15304
Downloaded 3500 / 15304
Downloaded 3600 / 15304
Downloaded 3700 / 15304
Downloaded 3800 / 15304
Downloaded 3900 / 15304
Downloaded 4000 / 15304
Downloaded 4100 / 15304
Down

# Get Features From Original ResNet152

In [16]:
model = torchvision.models.resnet152(pretrained=True)
modules = list(model.children())[:-1]
model_last = nn.Sequential(*modules)

In [17]:
feature_path = '../../data/features/fashion_models/dresses/resnet/'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

In [18]:
image_utils.save_feature_vectors_from_model(filelist, feature_path, model_last, data_transforms)

Downloaded 0 / 15304
Downloaded 100 / 15304
Downloaded 200 / 15304
Downloaded 300 / 15304
Downloaded 400 / 15304
Downloaded 500 / 15304
Downloaded 600 / 15304
Downloaded 700 / 15304
Downloaded 800 / 15304
Downloaded 900 / 15304
Downloaded 1000 / 15304
Downloaded 1100 / 15304
Downloaded 1200 / 15304
Downloaded 1300 / 15304
Downloaded 1400 / 15304
Downloaded 1500 / 15304
Downloaded 1600 / 15304
Downloaded 1700 / 15304
Downloaded 1800 / 15304
Downloaded 1900 / 15304
Downloaded 2000 / 15304
Downloaded 2100 / 15304
Downloaded 2200 / 15304
Downloaded 2300 / 15304
Downloaded 2400 / 15304
Downloaded 2500 / 15304
Downloaded 2600 / 15304
Downloaded 2700 / 15304
Downloaded 2800 / 15304
Downloaded 2900 / 15304
Downloaded 3000 / 15304
Downloaded 3100 / 15304
Downloaded 3200 / 15304
Downloaded 3300 / 15304
Downloaded 3400 / 15304
Downloaded 3500 / 15304
Downloaded 3600 / 15304
Downloaded 3700 / 15304
Downloaded 3800 / 15304
Downloaded 3900 / 15304
Downloaded 4000 / 15304
Downloaded 4100 / 15304
Down

# Get PCA From ResNet152

In [None]:
feature_path = './test_features/models_sample/resnet_retrained/'
resnet_retrained_feats = np.array([np.load(f) for f in glob.glob(feature_path + '*.npy')])

In [None]:
scaler = StandardScaler()
resnet_retrained_feats_std = scaler.fit_transform(resnet_retrained_feats)

In [None]:
sklearn_pca = PCA(svd_solver='full', n_components=32)
resnet_feats_pca = sklearn_pca.fit_transform(resnet_retrained_feats_std)
resnet_feats_pca.shape

In [None]:
sklearn_pca.explained_variance_

In [None]:
pca_feature_path = './test_features/dresses_sample/resnet_retrained_pca_32/'
if not os.path.exists(pca_feature_path):
    os.makedirs(pca_feature_path)

In [None]:
for idx, filename in enumerate(filenames):
    np.save(os.path.join(pca_feature_path, filename + '.npy'), resnet_feats_pca[idx])

# Get Features From Akiwi

In [20]:
feature_path = '../../data/features/fashion_models/dresses/akiwi_114/'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

In [22]:
image_utils.download_feature_vectors_114(filelist, feature_path)

Downloaded 0 / 15304
Downloaded 100 / 15304
Downloaded 200 / 15304
Downloaded 300 / 15304
Downloaded 400 / 15304
Downloaded 500 / 15304
Downloaded 600 / 15304
Downloaded 700 / 15304
Downloaded 800 / 15304
Downloaded 900 / 15304
Downloaded 1000 / 15304
Downloaded 1100 / 15304
Downloaded 1200 / 15304
Downloaded 1300 / 15304
Downloaded 1400 / 15304
Downloaded 1500 / 15304
Downloaded 1600 / 15304
Downloaded 1700 / 15304
Downloaded 1800 / 15304
Downloaded 1900 / 15304
Downloaded 2000 / 15304
Downloaded 2100 / 15304
Downloaded 2200 / 15304
Downloaded 2300 / 15304
Downloaded 2400 / 15304
Downloaded 2500 / 15304
Downloaded 2600 / 15304
Downloaded 2700 / 15304
Downloaded 2800 / 15304
Downloaded 2900 / 15304
Downloaded 3000 / 15304
Downloaded 3100 / 15304
Downloaded 3200 / 15304
Downloaded 3300 / 15304
Downloaded 3400 / 15304
Downloaded 3500 / 15304
Downloaded 3600 / 15304
Downloaded 3700 / 15304
Downloaded 3800 / 15304
Downloaded 3900 / 15304
Downloaded 4000 / 15304
Downloaded 4100 / 15304
Down

In [23]:
feat_path64 = '../../data/features/fashion_models/dresses/akiwi_64/'
if not os.path.exists(feat_path64):
    os.makedirs(feat_path64)
    
feat_path50 = '../../data/features/fashion_models/dresses/akiwi_50/'
if not os.path.exists(feat_path50):
    os.makedirs(feat_path50)

In [24]:
feats114 = glob.glob(feature_path + '*.npy')
for idx, file in enumerate(feats114):
    if idx % 1000 == 0:
        print(idx)
    
    fv = np.load(file)
    
    feat64 = fv[:64]
    np.save(os.path.join(feat_path64, os.path.basename(file)), feat64)
    
    feat50 = fv[64:]
    np.save(os.path.join(feat_path50, os.path.basename(file)), feat50)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
