The Analysis in this notebook is mainly on: 
1. Splitting subjects into high/med/low differentiation based on neural data pre/post sketching (data from Jordan)


# Setup

In [1]:
from __future__ import division

import numpy as np
import os
from glob import glob
import scipy

from PIL import Image
from copy import deepcopy

from sklearn import linear_model, datasets, neighbors
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm

%matplotlib inline
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
sns.set_context('notebook', font_scale=1.5)
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText


# import torch
# import torchvision.models as models
# import torch.nn as nn
# import torchvision.transforms as transforms
# import torch.nn.functional as F
# from torch.autograd import Variable
# import sys
# sys.path.insert(0, '/home/rslee/sketchfeat/sketches')


# import embedding as emb
# reload(emb)
# from embedding import *


import pandas as pd
import pickle

VGG_SIZE_X = 224
VGG_SIZE_Y = 224
VGG_SIZE_Z = 3

ROI_all = ['V1', 'V2', 'LOC', 'IT', 'fusiform', 'parahippo', 
            'PRC', 'ento', 'hipp', 'mOFC', 'IFG', 'rostMFG', 
            'caudMFG', 'precentral', 'SMG', 'STG']




In [2]:
# partial = pd.read_csv('/home/rslee/sketchfeat/0110171_partial.csv')
partial = pd.read_csv('/Users/sasha/Desktop/partial_sketch_full.csv')
partial['curr_winner'] = partial[['bed', 'bench', 'chair', 'table']].idxmax(axis = 1)
partial['tc_pair'] = partial[['target', 'competitor']].apply(
                                    lambda x: '{:s}/{:s}'.format(x[0], x[1]), axis = 1)
partial['trialID'] = partial[['wID', 'trial']].apply(
                                    lambda x: '{:s}_{:d}'.format(x[0], x[1]), axis = 1)

In [19]:
neural_diff_raw = pd.read_csv('/Users/sasha/Documents/sketchfeat_data/neural_changes_by_surfroi_and_subject.csv')
##
# so looks like there will be one row per subject, and 4 columns per ROI. 
# trained_{roi} and control_{roi} are the anchored measures of representational change 
# for the trained and control conditions respectively. 
# tradiff_{roi} is the unanchored trained, and condiff_{roi} is control
#first column is ids of each subject, eg 0110171_neurosketch


# until jordan updates the neural_diff data, we'll only be using 31 subjects as opposed to 35 
missing_subj = np.setdiff1d(np.unique(partial['wID']), neural_diff_raw['IDs'])
mask = partial['wID'].isin(missing_subj)
partial = partial[~mask];



In [42]:
partial.head()

Unnamed: 0.1,Unnamed: 0,index,wID,viewpoint,trial,trialDuration,target,competitor,numSketch,bed,bench,chair,table,curr_winner,tc_pair,trialID
0,0,0,0119174_neurosketch,20,320,39.00144,bench,chair,0,0.026822,0.055895,0.916243,0.00104,chair,bench/chair,0119174_neurosketch_320
1,1,0,0119174_neurosketch,20,320,39.00144,bench,chair,1,0.934225,0.026225,0.039085,0.000465,bed,bench/chair,0119174_neurosketch_320
2,2,0,0119174_neurosketch,20,320,39.00144,bench,chair,2,0.006035,0.97077,0.002714,0.020481,bench,bench/chair,0119174_neurosketch_320
3,3,0,0119174_neurosketch,20,320,39.00144,bench,chair,3,0.009842,0.978927,0.003784,0.007447,bench,bench/chair,0119174_neurosketch_320
4,4,0,0119174_neurosketch,20,320,39.00144,bench,chair,4,0.019298,0.967674,0.001463,0.011564,bench,bench/chair,0119174_neurosketch_320


In [27]:
neural_diff_raw.head()

Unnamed: 0.1,Unnamed: 0,IDs,trained_V1,trained_V2,trained_LOC,trained_IT,trained_fusiform,trained_parahippo,trained_PRC,trained_ento,...,condiff_PRC,condiff_ento,condiff_hipp,condiff_mOFC,condiff_IFG,condiff_rostMFG,condiff_caudMFG,condiff_precentral,condiff_SMG,condiff_STG
0,0,0110171_neurosketch,0.013969,0.026494,0.024232,0.002145,0.017853,-0.066191,-0.038201,0.026076,...,-0.0347,2.7e-05,-0.064084,-0.086041,-0.087247,-0.050124,-0.094722,-0.050321,-0.127886,-0.021643
1,1,0110172_neurosketch,-0.010256,-0.00122,0.024393,-0.045568,-0.019246,-0.008816,0.007859,0.026258,...,-0.030602,-0.0176,-0.115821,0.012603,0.003377,-0.029311,-0.01786,0.038456,0.041973,-0.032208
2,2,0111171_neurosketch,0.067659,0.037721,0.044253,0.078619,0.021371,-0.093432,-0.043942,-0.033467,...,-0.005794,-0.050809,0.009089,0.059522,0.052912,0.100313,-0.011249,0.139831,0.036634,0.018697
3,3,0112171_neurosketch,-0.000842,0.002255,-0.034087,-0.052698,-0.055887,-0.012348,0.064003,0.103867,...,-0.015484,0.054129,-0.056987,-0.003193,0.016371,0.066079,0.065308,0.025893,-0.015257,-0.003792
4,4,0112172_neurosketch,0.015042,0.002949,0.002315,-0.000101,0.00723,0.003432,-0.033731,-0.010261,...,-0.029092,0.007124,0.039762,0.03822,-0.014906,-0.004139,-0.060099,0.036545,0.048539,0.034781


In [28]:
# output a df with rows of subjects + columns of ROI's with order 

neural_diff = pd.DataFrame(neural_diff_raw['IDs'])

for cond in ['trained', 'condiff']:
    for roi in ROI_all:

        # create string for column name 
        layer_name = cond + '_' + roi
        layer_name_order = layer_name + '_order'


        # get sorted order 
        neural_diff[layer_name] = neural_diff_raw[layer_name]
        neural_diff[layer_name_order] = np.argsort(neural_diff_raw[layer_name])


# Plotting collapsed max/avg/location of max from neural_diff 

In [25]:
neural_diff.head()

Unnamed: 0,IDs,trained_V1,trained_V1_order,trained_V2,trained_V2_order,trained_LOC,trained_LOC_order,trained_IT,trained_IT_order,trained_fusiform,...,trained_rostMFG,trained_rostMFG_order,trained_caudMFG,trained_caudMFG_order,trained_precentral,trained_precentral_order,trained_SMG,trained_SMG_order,trained_STG,trained_STG_order
0,0110171_neurosketch,0.013969,17,0.026494,17,0.024232,10,0.002145,17,0.017853,...,-0.100491,25,-0.067678,25,-0.042459,10,-0.047764,10,0.023193,10
1,0110172_neurosketch,-0.010256,29,-0.00122,25,0.024393,8,-0.045568,10,-0.019246,...,-0.019949,28,-0.089583,5,-0.044435,8,-0.002139,8,-0.00091,25
2,0111171_neurosketch,0.067659,25,0.037721,29,0.044253,25,0.078619,9,0.021371,...,0.056324,0,0.05289,1,-0.070534,2,0.026195,9,-0.020822,8
3,0112171_neurosketch,-0.000842,10,0.002255,10,-0.034087,3,-0.052698,3,-0.055887,...,0.008033,17,0.059755,10,-0.022817,25,0.006325,17,-0.01867,19
4,0112172_neurosketch,0.015042,13,0.002949,8,0.002315,19,-0.000101,1,0.00723,...,-0.000506,10,0.05204,11,0.082712,9,0.03211,21,0.082915,17


In [43]:
# first for V1
trials = np.unique(partial['trial'])

df = []


# def split_by_media(p_df, neural_df, roi, cond):
if True:
    roi = 'V1'
    p_df = partial; 
    neural_df = neural_diff; 
    cond = 'condiff'
    
    # get the top/bottom avg subjects for this ROI 
    
    layer_name = cond + '_' + roi; 
    neural_diff[layer_name]
    
    above_med_subj = (neural_diff['IDs'])[neural_diff[layer_name] >= np.median(neural_diff[layer_name])];
    below_med_subj = (neural_diff['IDs'])[neural_diff[layer_name] < np.median(neural_diff[layer_name])];

    for t_i, t in enumerate(trials):
        
        for subj_group, name in zip([above_med_subj, below_med_subj], ['top_max', 'bottom_max']: 
            partial_i = partial[(partial['trial'] == t) & (partial['wID'].isin(group))]
        
        df.append([roi, t, top_max, bottom_max, top_max_i, bottom_max_i, top_avg, bottom_avg])

    


> <ipython-input-43-5f9c6d1b9f76>(23)<module>()
-> for t_i, t in enumerate(trials):
(Pdb) partial_i.head()
      Unnamed: 0  index                  wID  viewpoint  trial  trialDuration  \
1840        1840      0  0125171_neurosketch         26    320       39.00267   
1841        1841      0  0125171_neurosketch         26    320       39.00267   
1842        1842      0  0125171_neurosketch         26    320       39.00267   
1843        1843      0  0125171_neurosketch         26    320       39.00267   
1844        1844      0  0125171_neurosketch         26    320       39.00267   

     target competitor  numSketch       bed     bench     chair     table  \
1840  bench      chair          0  0.542702  0.453491  0.003676  0.000130   
1841  bench      chair          1  0.023311  0.963700  0.012762  0.000227   
1842  bench      chair          2  0.489245  0.416566  0.094088  0.000101   
1843  bench      chair          3  0.002681  0.996557  0.000740  0.000022   
1844  bench      chai

BdbQuit: 

In [24]:
foo = [1, 2, 3]
foo.append(4)
foo

foo.append([2, 3, 4])
foo

[1, 2, 3, 4, [2, 3, 4]]

In [None]:
figure = plt.figure(figsize = (20, 7 *len(ROI)))

for roi in ROI_all:
    