In [1]:
from __future__ import division

import numpy as np
import os
from glob import glob

from PIL import Image
from copy import deepcopy

from sklearn import linear_model, datasets, neighbors
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm

%matplotlib inline
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import utils

import torch
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable

import pandas as pd

from scipy.stats import norm
import sklearn

import pickle

In [2]:
# downloading sketch info (SF takes a while) 
SF = pickle.load(open('/tigress/rslee/SF.p', 'rb'))
S = pickle.load(open('/tigress/rslee/S.p', 'rb'))

In [3]:
def list_files(path, ext='png'):
    result = [y for x in os.walk(path)
              for y in glob(os.path.join(x[0], '*.%s' % ext))]
    return result

path_to_sketches = '/home/rslee/sketch_data'

def get_trial_ID_from_metadata(path):
    metadata = pd.read_csv(path)[['wID', 'trial','target']]
    trialID = ["%s_trial_%s_%s" % (metadata['wID'].values[row], metadata['trial'].values[row], metadata['target'].values[row])
               for row in xrange(0,len(metadata))]
    return trialID

def get_viewpoint_from_metadata(path):
    return pd.read_csv(path).viewpoint.values.tolist()

def get_competitor_from_metadata(path):
    return pd.read_csv(path).competitor.values.tolist()



metadata_paths = list_files(path_to_sketches, ext='csv') 
trialID_metadata = sum(map(get_trial_ID_from_metadata, metadata_paths), [])
viewpoint_sketches = np.asarray(sum(map(get_viewpoint_from_metadata, metadata_paths),[]))
competitor_sketches = np.asarray(sum(map(get_competitor_from_metadata, metadata_paths),[]))

print 'Number of metadata files: ' + str(len(metadata_paths))
print 'Number of metadata trials for models: ' + str(len(viewpoint_sketches))


Number of metadata files: 35
Number of metadata trials for models: 1400


In [4]:
# getting the metadata to be sorted correctly. 

inds_metadata = np.argsort(trialID_metadata)
_trialID_metadata = np.asarray(trialID_metadata)[inds_metadata]
_SVP_bt = np.asarray(viewpoint_sketches)[inds_metadata]
_comp_bt = np.asarray(competitor_sketches)[inds_metadata]

In [5]:
SL = np.array(S.label)
# normalize and get means of feature vectors

def normalize(X):
    X = X - X.mean(0)
    X = X / np.maximum(X.std(0), 1e-5)
    return X

def get_class_means(X, labels):
    # load in and normalize features 
    X = normalize(X)
    _mu = np.zeros((len(np.unique(np.asarray(labels))), X.shape[1]), float)
    for vi, v in enumerate(np.unique(np.asarray(labels))):
        Xv = X[labels == v]
        nv = float(Xv.shape[0])
        if nv > 0:

            _mu[vi] = Xv.mean(0)
    return _mu


def get_means_across_views(X, labels, viewpoints): 
    _mu = np.zeros((len(np.unique(viewpoints)) * len(FURNITURES), X.shape[1]),float) # saving the averaged SF per laer here 
    for obj_i, obj in enumerate(FURNITURES):
        inds_object = np.where(labels == obj)
        X_obj = X[inds_object] # taking all the views per object here 
        
        means = get_class_means(X_obj, viewpoints[inds_object])
        
        _mu[obj_i * len(np.unique(viewpoints)): (obj_i + 1) * len(np.unique(viewpoints)), :] = means
    return _mu
        
_SF_bt = []
for layer in xrange(0,7):
    inds = np.argsort(S.trialID.values)
    SF_i = SF[layer]
    _SF_bt.append(SF_i[inds])
_SL_bt = SL[inds]
S_subj = S.subj.values
_S_subj_bt = S_subj[inds]
_S_trial_bt = S.trial.values[inds]



In [14]:
# getting pixel level data 
VGG_SIZE_X, VGG_SIZE_Y = 224, 224
VGG_SIZE_Z = 3

SP = np.empty((num_sketches, VGG_SIZE_X * VGG_SIZE_Y* VGG_SIZE_Z), float)
SP_i = 0 # used for the batch index 
for path in S.path: 
    img = utils.load_image(path)
    # take out the fourth dimension, alpha, which controls transparency
    img = img[:,:,:3]

    img = np.asarray(img.flatten()).reshape(1, VGG_SIZE_X * VGG_SIZE_Y* VGG_SIZE_Z)
          
    SP[SP_i, :] = img
    SP_i += 1 

inds = np.argsort(S.trialID.values)
_SP_bt = SP[inds]


AttributeError: 'module' object has no attribute 'load_image'

In [6]:
# setting the index right for the sketches


inds_final = np.lexsort((np.asarray(_S_subj_bt), _SVP_bt.astype(int), np.asarray(_SL_bt)))
_SVP = _SVP_bt[inds_final]
_SL = _SL_bt[inds_final]
_S_subj = _S_subj_bt[inds_final]
_S_trial = _S_trial_bt[inds_final]
_comp = _comp_bt[inds_final]
# _SP = _SP_bt[inds_final]
_SF = []
for layer in xrange(0,7):
    _SF_i = _SF_bt[layer]
    _SF.append(_SF_i[inds_final])

check1 = _trialID_metadata[inds_final]
check2 = S.trialID.values[np.argsort(S.trialID.values)][inds_final]

print _S_subj[70:180]
print _SVP[70:180]
print _SL[70:180]
print ("Checking that the indexing is correct: " + str(np.array_equal(check1, check2)))

['0119172_neurosketch' '0119173_neurosketch' '0120173_neurosketch'
 '0123171_neurosketch' '0125172_neurosketch' '1121161_neurosketch'
 '1202161_neurosketch' '1206161_neurosketch' '1206162_neurosketch'
 '1206163_neurosketch' '0110171_neurosketch' '0110172_neurosketch'
 '0111171_neurosketch' '0113171_neurosketch' '0115172_neurosketch'
 '0119171_neurosketch' '0119172_neurosketch' '0119173_neurosketch'
 '0120173_neurosketch' '0123171_neurosketch' '0125172_neurosketch'
 '1121161_neurosketch' '1202161_neurosketch' '1206161_neurosketch'
 '1206162_neurosketch' '1206163_neurosketch' '0110171_neurosketch'
 '0110172_neurosketch' '0111171_neurosketch' '0113171_neurosketch'
 '0115172_neurosketch' '0119171_neurosketch' '0119172_neurosketch'
 '0119173_neurosketch' '0120173_neurosketch' '0123171_neurosketch'
 '0125172_neurosketch' '1121161_neurosketch' '1202161_neurosketch'
 '1206161_neurosketch' '1206162_neurosketch' '1206163_neurosketch'
 '0110171_neurosketch' '0110172_neurosketch' '0111171_neuroske

In [7]:
unique_axes = ['bed_bench','bed_chair','bed_table','bench_chair','bench_table','chair_table']

def assign_axis_to_metadata(labels, competitor):
    _combos = [i+'_'+j for (i,j) in zip(list(np.asarray(labels)),list(np.asarray(competitor)))]
    lookup = {'bed_bench':'bed_bench',
              'bench_bed':'bed_bench',
              'bed_chair':'bed_chair',
              'chair_bed':'bed_chair',
              'bed_table':'bed_table',
              'table_bed':'bed_table',
              'bench_chair':'bench_chair',
              'chair_bench':'bench_chair',
              'bench_table':'bench_table',
              'table_bench':'bench_table',
              'chair_table':'chair_table',
              'table_chair':'chair_table',          
             }
    axis = [lookup[c] for c in _combos]

    return axis

_axis = assign_axis_to_metadata(_SL, _comp)

# Within_run Analysis

In [10]:

import scipy.stats as stats
LAYERS_NM = ['pixel', 'pool1', 'pool2', 'pool3', 'pool4', 'pool5', 'fc6', 'fc7']

  
for i in reversed(xrange(0,7)):
    which_feat = LAYERS_NM[i]
    FEATMAT = _SF[i]
    # width of moving window in # of trials    
    window_size = 4 
    first_trial = int(min(_S_trial.astype(int)))
    last_trial = int(max(_S_trial.astype(int)))
    num_windows = last_trial-first_trial-window_size+2 ##     

    SIM = []
    subs = np.unique(_S_subj)
    for sub in subs:
        sim = []
        for n in np.arange(num_windows):
            start = first_trial + n
            end = first_trial + n + window_size
            span = np.arange(start,end)
            axis = np.unique(np.asarray(_axis)[_S_subj==sub])[0]
            o1 = axis.split('_')[0]
            o2 = axis.split('_')[1]
            o1_inds = (_SL==o1) & (_S_subj==sub) & (pd.to_numeric(_S_trial)>=start) & (pd.to_numeric(_S_trial)<=end)
            o2_inds = (_SL==o2) & (_S_subj==sub) & (pd.to_numeric(_S_trial)>=start) & (pd.to_numeric(_S_trial)<=end)
            FEAT_o1 = FEATMAT[o1_inds]
            FEAT_o2 = FEATMAT[o2_inds]
            MEANFEAT_o1 = FEATMAT[o1_inds].mean(0)
            MEANFEAT_o2 = FEATMAT[o2_inds].mean(0)
            sim.append(stats.pearsonr(MEANFEAT_o1,MEANFEAT_o2)[0])
        SIM.append(sim)
    SIM = np.array(SIM)


    ## plot time series across runs of training
    sns.set_context('poster')
    sns.set_style('dark')
    fig = plt.figure(figsize=(8,6))
    plt.xlabel('repetitions')
    plt.ylabel('correlation distance')
    plt.xlim(0,36)
    for i,j in zip([0,10,20,30],[7,17,27,37]):
        x_inds = list(np.arange(i,j))
        y_vals = 1-SIM.mean(0)[x_inds]
        plt.plot(x_inds,y_vals,'k',linewidth=4)
    # for i,j in zip([0,10,20,30],[6,16,26,36]):  
    #     plt.axvspan(i,j, alpha=0.4, color='red')    
    for i,j in zip([6,16,26],[10,20,30]):
        plt.axvspan(i,j, alpha=0.75, color='white') 
    run_labels = ['run 1','run 2','run 3', 'run 4']    
    tick = plt.xticks(np.arange(3.5, 36,10.0),run_labels) 
    plt.tight_layout()
    if not os.path.exists('./plots'):
        os.makedirs('./plots')
    plt.savefig(os.path.join('./plots','similarity_sketch_timecourse_allruns_{}.pdf'.format(which_feat)))
    
   

> <ipython-input-10-83ffa129b5a2>(40)<module>()
-> assert s.shape==(SIM.shape[2],)
(Pdb) SIM.shape
(35, 37)
(Pdb) q


BdbQuit: 

In [9]:
import scipy.stats as stats
LAYERS_NM = ['pixel', 'pool1', 'pool2', 'pool3', 'pool4', 'pool5', 'fc6', 'fc7']
for i in reversed(xrange(0,7)):
    which_feat = LAYERS_NM[i]
    FEATMAT = _SF[i]
    num_runs = 4
    run_length = 10
    first_trial = int(min(_S_trial.astype(int)))
    last_trial = int(max(_S_trial.astype(int))) 

    subs = np.unique(_S_subj)
    SIM = []
    for sub in subs:
        Sim = []
        for n in np.arange(num_runs):
            sim = []
            start = first_trial + run_length*n
            end = first_trial + run_length*(n+1) - 1
            window_size = 4 # width of moving window in # of trials
            num_windows = end-start-window_size+2 ## 
            for _n in np.arange(num_windows):
                _start = start + _n
                _end = start + _n + window_size                
                axis = np.unique(np.asarray(_axis)[_S_subj==sub])[0]
                o1 = axis.split('_')[0]
                o2 = axis.split('_')[1]
                o1_inds = (_SL==o1) & (_S_subj==sub) & (pd.to_numeric(_S_trial)>=_start) & (pd.to_numeric(_S_trial)<=_end)
                o2_inds = (_SL==o2) & (_S_subj==sub) & (pd.to_numeric(_S_trial)>=_start) & (pd.to_numeric(_S_trial)<=_end)
                FEAT_o1 = FEATMAT[o1_inds]
                FEAT_o2 = FEATMAT[o2_inds]
                MEANFEAT_o1 = FEAT_o1.mean(0)
                MEANFEAT_o2 = FEAT_o2.mean(0)
                sim.append(stats.pearsonr(MEANFEAT_o1,MEANFEAT_o2)[0])
            Sim.append(sim)
        SIM.append(Sim)
    SIM = np.array(SIM)
    
         ## spearman correlation over the mean for each timewindow in each run
    sub_spearman2 = []
    for i, s in enumerate(SIM.mean(1)):
        assert s.shape==(SIM.shape[2],)
        sub_spearman2.append(1 - stats.spearmanr(np.arange(SIM.shape[2]),s)[0])    

    W = pd.DataFrame([subs,sub_spearman2])
    W = W.transpose()
    W.columns = ['subj','runwise_similarity_change']
    W.to_csv('sketch_similarity_timecourse_within_run_{}.csv'.format(which_feat))
    
    