In [1]:
from __future__ import division

import numpy as np
import os
from glob import glob

from PIL import Image
from copy import deepcopy

from sklearn import linear_model, datasets, neighbors
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm

%matplotlib inline
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns

import torch
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable

import pandas as pd

import scipy.stats as stats
from scipy.stats import norm
import sklearn

import embedding as emb
reload(emb)
from embedding import *

In [3]:
def list_files(path, ext='png'):
    result = [y for x in os.walk(path)
              for y in glob(os.path.join(x[0], '*.%s' % ext))]
    return result

partial_sketch_dir = '../partial_sketches'
partial_paths = list_files(partial_sketch_dir)

def save_partial_features(Features, Labels, Chunks, Subjs, Trials, X, layer_num):
    layers = ['P1','P2','P3','P4','P5','FC6','FC7']
    np.save('FEATURES_PARTIAL_{}.npy'.format(layers[layer_num]), Features)
    np.save('LABELS_PARTIAL.npy', Labels)      
    return layer_num

def normalize(X):
    X = X - X.mean(0)
    X = X / np.maximum(X.std(0), 1e-5)
    return X

def preprocess_features(Features, Labels, Chunks, Subjs, Trials, X):
    # normalize feature vectors & plot full image-level similarity matrix
    _X = X.sort_values(['label','trial','subj'])
    inds = np.array(_X.index)
    _Features = normalize(Features[inds])
    _Labels = Labels[inds]
    _Chunks = Chunks[inds]
    _Subjs = Subjs[inds]
    _Trials = Trials[inds]    
    return _Features, _Labels, _Chunks, _Subjs, _Trials, _X

unique_axes = ['bed_bench','bed_chair','bed_table','bench_chair','bench_table','chair_table']

def assign_axis_to_metadata(_X):
    _combos = [i+'_'+j for (i,j) in zip(list(_X.label.values),list(_X.competitor.values))]
    lookup = {'bed_bench':'bed_bench',
              'bench_bed':'bed_bench',
              'bed_chair':'bed_chair',
              'chair_bed':'bed_chair',
              'bed_table':'bed_table',
              'table_bed':'bed_table',
              'bench_chair':'bench_chair',
              'chair_bench':'bench_chair',
              'bench_table':'bench_table',
              'table_bench':'bench_table',
              'chair_table':'chair_table',
              'table_chair':'chair_table',          
             }
    axis = [lookup[c] for c in _combos]
    _X = _X.assign(axis=pd.Series(axis).values)
    return _X

# augment with additional metadata from each subject's metadata file
sub_dirs = [i for i in sorted(os.listdir(path_to_sketches)) if i[-11:]=='neurosketch']
path_to_sub_meta = [os.path.join(path_to_sketches,s,s+'_metadata.csv') for s in sub_dirs]

# concatenate all subjects' metadata files
Y = []
for p in path_to_sub_meta:
    _Y = pd.read_csv(p)
    if len(Y)==0:
        Y = _Y
    else:
        Y = pd.concat([Y,_Y],ignore_index=True)
 

IOError: File ../partial_sketches/0110171_neurosketch/0110171_neurosketch_metadata.csv does not exist

In [None]:
reallyRun = 0
if reallyRun:
    already_generated_metadata = 0
    for layer_ind in np.arange(7):
        layers = ['P1','P2','P3','P4','P5','FC6','FC7']        
        print('Extracting Layer {}'.format(layers[layer_ind]))
        extractor = PartialFeatureExtractor(partial_paths,layer_ind)
        Features, Labels, Chunks, Subjs, Trials = extractor.extract_feature_matrix()  

        if already_generated_metadata == 0:
            P = pd.DataFrame([Labels,Chunks,Subjs,Trials])
            P = P.transpose()
            P.columns = ['label','chunk','subj','trial']

            ## look up competitor for each trial
            competitor = []
            for index, row in P.iterrows():
                this_label = row['label']
                this_subj = row['subj']
                this_trial = row['trial']
                competitor.append(Y[(Y['trial']==this_trial) & (Y['target']==this_label) & (Y['wID']==this_subj)]['competitor'].values[0])
            ## assign competitor to P
            P = P.assign(competitor=pd.Series(competitor).values)    
            ## assign axis to P
            _P = assign_axis_to_metadata(P)     
            _P.to_csv('METADATA_PARTIAL.csv')
            already_generated_metadata += 1
        else:
            _P = pd.read_csv('METADATA_PARTIAL.csv')

        print('Preprocessing features...')
        subset_size = 10000 ## if num feature dimensions exceeds this value, take random subset of features
        if Features.shape[1]>10000:
            subset_inds = np.random.RandomState(0).permutation(Features.shape[1])[:subset_size]
            Features = Features[:,subset_inds]        
        _Features, _Labels, _Chunks, _Subjs, _Trials, _P = preprocess_features(Features, Labels, Chunks, Subjs, Trials, P)
        _P = assign_axis_to_metadata(_P)
        print('Saving features out...')    
        layer_num = save_partial_features(_Features, _Labels, _Chunks, _Subjs, _Trials,  _P, layer_ind)


Extracting Layer P1
0
Batch 1
Batch 2
Batch 3
Batch 4
Batch 5
Batch 6
Batch 7
Batch 8
Batch 9
Batch 10
Batch 11
Batch 12
Batch 13
Batch 14
Batch 15
Batch 16
Batch 17
Batch 18
Batch 19
Batch 20
Batch 21
Batch 22
Batch 23
Batch 24
Batch 25
Batch 26
Batch 27
Batch 28
Batch 29
Batch 30
Batch 31
Batch 32
Batch 33
Batch 34
Batch 35
Batch 36
Batch 37
Batch 38
Batch 39
Batch 40
Batch 41
Batch 42
Batch 43
Batch 44
Batch 45
Batch 46
Batch 47
Batch 48
Batch 49
Batch 50
Batch 51
Batch 52
Batch 53
Batch 54
Batch 55
Batch 56
Batch 57
Batch 58
Batch 59
Batch 60
Batch 61
Batch 62
Batch 63
Batch 64
Batch 65
Batch 66
Batch 67
Batch 68
Batch 69
Batch 70
Batch 71
Batch 72
Batch 180
Batch 181
Batch 182
Batch 183
Batch 184
Batch 185
Batch 186
Batch 187
Batch 188
Batch 189
Batch 190
Batch 191
Batch 192
Batch 193
Batch 194
Batch 195
Batch 196
Batch 197
Batch 198
Batch 199
Batch 200
Batch 201
Batch 202
Batch 203
Batch 204
Batch 205
Batch 206
Batch 207
Batch 208
Batch 209
Batch 210
Batch 211
Batch 212
Batch 213

In [29]:
## look up competitor for each trial
competitor = []
for index, row in P.iterrows():
    this_label = row['label']
    this_subj = row['subj']
    this_trial = row['trial']
    competitor.append(Y[(Y['trial']==this_trial) & (Y['target']==this_label) & (Y['wID']==this_subj)]['competitor'].values[0])
## assign competitor to P
P = P.assign(competitor=pd.Series(competitor).values)



In [35]:
## assign axis to P
_P = assign_axis_to_metadata(P)
_Features, _Labels, _Chunks, _Subjs, _Trials, _P = preprocess_features(Features, Labels, Chunks, Subjs, Trials, P)
layer_num = save_partial_features(_Features, _Labels, _Chunks, _Subjs, _Trials,  _P, layer_ind)