In [None]:
import keras
import keras.backend as K
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.imagenet_utils import decode_predictions
from keras.models import Model
import tensorflow as tf
import os
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'Blues'
import numpy as np
np.random.seed(10)
import pandas as pd
import seaborn as sns
import scipy
import statsmodels
import json
import math
import imageio
import itertools
from PIL import Image, ImageDraw
from IPython.display import clear_output
from functools import reduce, partial
from scipy.stats import linregress, t, norm

<a id='TOC'></a>
# Running Synthesis Validation

1. [Arrangement Task](#arrangement)
    * [Helper functions](#helper-functions)
    * [Building stimulus lists](#stim-lists)
    * [Preprocessing data](#preprocessing)
    * [Turn to dataframe](#turn-to-df)
2. [Arrangement Analysis](#arrange-analysis)
    * [Distance computations](#distance-comp) 
    * [Plotting model features](#model-plot)
3. [Feature Relationship Computations](#feature-comp) 
    * [Initial inception setup](#initial-incept) 
    * [Inception feature extraction](#incept-extract) 
    * [Initial VGG19 setup](#initial-vgg)
    * [VGG19 feature extraction](#vgg-extract) 
    * [Plotting layer heatmaps](#layer-heatmaps)
    * [Plotting relevant layer scatterplots](#layer-scatters)
    * [Printing feature correlation values](#feature-corrs)
3. [fMRI Data Analysis](#fmri-analysis) 
    * [Pre-learning Correlation](#prelearning-corr)
    * [Comparing to Noise](#prelearning-noise)
4. [Visualize Arrangement Trials](#arrangement-visualize) 
    * [Helper functions](#helpers-visualize)
    * [Example visualizations](#example-visualize)

<a id='arrangement'></a>
# Arrangement Task

The first chunk of this notebook analyzes behavioral data from the arrangement task that participants completed. Their task was to drag-and-drop subsets of the iamges until the images placed closest together were the most similar. Each participant completed at least 10 trials, and across these trials, we get distance measures for our critical image pairs. The cells that follow parse this data, perform distance computations, and plot them against our model similarity levels. 

[(back to top)](#TOC)

<a id='helper-functions'></a>
## Helper functions

The below functions are designed to read, place and populate distance matrices for the 128 synthesized stimuli 

[(back to top)](#TOC)

In [None]:
# Given an item and a given trials dictionary, retrieve the x and y coordinates

def retrieve_coords(coord_dict,item):
    coords = str(coord_dict[item])
    coords = coords.replace("'", "")
    coords = coords.replace(", ", ",")
    x = int(coords.split(',')[0])
    y = int(coords.split(',')[1])
    return x, y

# Given a dictionary, return the keys (all items present in the dict)

def retrieve_keys(coord_dict):
    out_keys = list(coord_dict.keys())
    return sorted(out_keys)

# Given a dictionary and two critical items, compute the euclidian distance

def compute_dist(coord_dict, item1, item2):
    x1, y1 = retrieve_coords(coord_dict, item1)
    x2, y2 = retrieve_coords(coord_dict, item2)
    dist = np.sqrt(np.square(x1-x2)+np.square(y1-y2))
    return dist

# Create an n by n pandas dataframe, where n is the total number of keys. If full=None, its blank
# if full=df, where df is a given dataframe, it will populate it with the values.
# This is useful for switching to np.array and back

def create_mat(full_keys, full=None):
    dist_mat = pd.DataFrame(data=full, index=full_keys, columns=full_keys)
    return dist_mat

# Given a pandas dataframe, two items, and a distance between them, this function places the distance appropriately

def place_dist(fillable_mat, item1, item2, dist):
    fillable_mat.loc[item1,item2] = dist
    fillable_mat.loc[item2,item1] = dist
    #return fillable_mat

# Pull a particular distance given a matrix and a pair of items    

def pull_dist(fillable_mat, item1, item2):
    out_dist = fillable_mat.loc[item1,item2]
    return out_dist

# Once pd dataframes have been turned into arrays, this compiles them and computes the mean across 3rd dimension
# This is because we will have multiple trials, where some have distances for a particular pair and other's don't
# If standard, it will standardize the subjects' distances wrt their own judgments

def std_array(in_array):
    array = np.array(in_array, np.float64)
    out_array = (array - np.nanmean(array)) / np.nanstd(array)
    return out_array

def nan_mean(arrays, standard=False):
    all_arrs = np.dstack(arrays)
    if standard:
        all_arrs = (all_arrs - np.nanmean(all_arrs)) / np.nanstd(all_arrs)
    avg_arr = np.nanmean(all_arrs, axis=2)
    return avg_arr    

def nan_median(arrays, standard=False):
    all_arrs = np.dstack(arrays)
    if standard:
        all_arrs = (all_arrs - np.nanmean(all_arrs)) / np.nanstd(all_arrs)
    med_arr = np.nanmedian(all_arrs, axis=2)
    return med_arr 

def subset(fullarray, run, AIX, BIX):
    critRun = fullarray[:,run]
    critRun = np.swapaxes(critRun, 0, -1)
    relCorrs = np.swapaxes(critRun[AIX, BIX], 0, -1)
    return relCorrs

def shufPrepost(fullarray):
    PAs = np.arange(0,15,2)
    PBs = np.arange(1,16,2)
    AIX = np.random.choice(PAs, 8, replace = False)
    BIX = np.random.choice(PBs, 8, replace = False)
    #diff = BIX - AIX
    #ones = diff[diff == 1]
    #if ones.shape[0] > 2:
    #    print(AIX, BIX)
    pre = subset(fullarray, 0, AIX, BIX)
    post = subset(fullarray, 7, AIX, BIX)
    return pre, post

def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap

def spot_color(cmap, point=0.5):
    cmap = matplotlib.cm.get_cmap(cmap)
    rgba = cmap(point)
    return rgba

def corr_compare(xy, xz, yz, n, twotailed=True, conf_level=0.95):
    # Steiger method
    # https://psycnet.apa.org/fulltext/1980-08757-001.pdf
    
    d = xy - xz
    determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
    av = (xy + xz)/2
    cube = (1 - yz) * (1 - yz) * (1 - yz)

    t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube)))
    pval = 1 - t.cdf(abs(t2), n - 3)

    if twotailed:
        pval = pval * 2

    return t2, pval


<a id='stim-lists'></a>
## Building stimulus lists

This cell uses the channel axes, similarity levels and A B tags to build a compiled list of all of the possible experimental stimuli.

[(back to top)](#TOC)

In [None]:
all_keys = []

chanPairs = ['11-5', '47-34', '56-68', '40-20', '17-85', '79-97', '102-109', '83-101']
simLevels = [0, 14, 29, 43, 57, 71, 86, 100]

for chanPair in chanPairs:
    for simLevel in simLevels:
        for pairMate in ['A', 'B']:
            all_keys.append('c{}_{}{}'.format(chanPair, simLevel, pairMate))

print('compiled {} image file names'.format(len(all_keys)))

<a id='preprocessing'></a>
## Preprocessing group data

1. Reads in the data file, then loop through each line (which should be a single subject)
2. Each line is an entire subjects' data, which is then parsed
3. Finds the string index where the data from each trial number starts (crit_ind)
4. Uses these onsets to loop through and parse the reaction time, build a dictionary of images and their locations
5. Runs a bunch of ugly code that makes the dictionary legible and useable for analyses
6. Compiles a numpy array containing all the subject IDs, trial numbers, RTs, and dictionaries

[(back to top)](#TOC)

In [None]:
datafiles = [open('drag/Batch{}.txt'.format(i)) for i in [1,2,3]]

sub_array = []
qual_array = []
trial_nums = []
times = []
dicts = []

for datafile in datafiles:
    # each line is a participant
    for line in datafile:
        # split each line into the subject identifiers, then the actual data
        qual_code = line.split(',')[0]
        turk_code = line.split(',')[1]
        trials = np.array(line.split(',')[3:])
        # count the number of trials
        ntrials = line.count('{')
        crit_inds=[]
        for i in range(1,ntrials+1):
            # for each trial, find the relevant string indices where the trial starts
            crit = np.in1d(trials, str(i))
            crit_ind = [ind for ind in range(len(trials)) if crit[ind]==True][0]
            crit_inds.append(crit_ind)
        for i in range(len(crit_inds)):
            # using thrdr indices, 
            ind = crit_inds[i]
            trial_num = i
            time = trials[ind+1]
            # Peel away all the nonsense punctuation for each trial
            if i < len(crit_inds)-1:
                next_ind = crit_inds[i+1]
                loc_dict = str(trials[ind+2:next_ind]).split('{')[-1]
            else:
                loc_dict = str(trials[ind+2:]).split('{')[-1]
            
            loc_dict = str(loc_dict).split('}')[0]
            loc_dict = '{' + loc_dict + '}'
            loc_dict = loc_dict.replace("\n", "")
            loc_dict = loc_dict.replace("' '", "', '")
            loc_dict = loc_dict.replace("'", "")
            loc_dict = json.loads(loc_dict)
            
            # Appends relevant data to lists.
            sub_array.append(turk_code)
            qual_array.append(qual_code)
            trial_nums.append(trial_num)
            times.append(time)
            dicts.append(loc_dict)

sub_array = np.array(sub_array)
qual_array = np.array(qual_array)
trial_nums = np.array(trial_nums)
times = np.array(times)
dicts = np.array(dicts)

print('found {} total trials from {} participants'.format(sub_array.shape[0], np.unique(sub_array).shape[0]))

<a id='turn-to-df'></a>
## Turn to dataframe

This cell turns the data into something more human readable in a dataframe, then displays part of it.

[(back to top)](#TOC)

In [None]:
data_full = pd.DataFrame(np.transpose(np.vstack((qual_array, sub_array, trial_nums, times, dicts))), 
                         columns=['qual','sub','trial','time','coords'])
rts = np.array(data_full.loc[:,'time'], dtype=np.int64)
rts = rts[:]
rts = rts/1000/60
print('average trial time = {} minutes'.format(np.around(np.mean(rts), 3)))
long_rts = [i for i in range(len(rts)) if rts[i] > 3]

data_full.head(10)


<a id='arrange-analysis'></a>
# Arrangement Analysis

The first chunk of this notebook analyzes behavioral data from the arrangement task that participants completed. Their task was to drag-and-drop subsets of the iamges until the images placed closest together were the most similar. Each participant completed at least 10 trials, and across these trials, we get distance measures for our critical image pairs. The cells that follow parse this data, perform distance computations, and plot them against our model similarity levels.

[(back to top)](#TOC)

<a id='distance-comp'></a>
## Distance computations

For each subject:
1. We read in their trialwise data and for each trial build an empty 128 x 128 dataframe.
2. Cycle through the possible pairs present in this trials.
3. Calculate the distance between each image in each pair (either standardized within trial or not).
4. Compile dataframes in third dimension across trials.
5. Find average for each participant, and count the number of ratings for each pair.

[(back to top)](#TOC)

In [None]:
# standardize within trial?
standard = False

In [None]:
allSubs = []
subjects = np.unique(data_full['sub'])
for si, subject in enumerate(subjects):
    arrays = []
    sub_data = data_full[data_full['sub']==subject]
    sub_trials = np.arange(sub_data.shape[0])
    for trial in sub_trials:
        print("subject {}, {}/{}, trial {}/{}".format(subject, si+1, len(subjects), trial + 1, sub_trials.shape[0]))
        clear_output(wait=True)
        thisTrial = create_mat(all_keys, full=None)
        coord_dict = sub_data.loc[sub_data['trial'] == trial, 'coords'].iloc[0]
        key = retrieve_keys(coord_dict)
        for i in key:
            for j in key:
                if i in all_keys and j in all_keys:
                    dist_measure = compute_dist(coord_dict, i, j)
                    place_dist(thisTrial, i, j, dist_measure)
        # standardize the distance within trial
        thisTrial = std_array(thisTrial) if standard else thisTrial
        arrays.append(np.array(thisTrial))
    # this contains this subject's trial set
    arrays = np.array(arrays, np.float64)
    # quantifying how many ratings have been given per pair
    numTrials = np.count_nonzero(~np.isnan(arrays), axis=0)
    numTrials = create_mat(all_keys, full=numTrials)
    # Getting the average for this subject
    subjMean = nan_mean(arrays, standard=False)
    subjMeanDF = create_mat(all_keys, full=subjMean)
    allSubs.append(np.array(subjMean))
    
# compiled data from all subjects
allSubs = np.array(allSubs, np.float64)
# number of ratings across all subjects
allCount = np.count_nonzero(~np.isnan(allSubs), axis=0)
# mean across subjects
allSubsMean = nan_mean(allSubs, standard=False)
# piling into dataframes
allCountDF = create_mat(all_keys, full=allCount)
allSubsMeanDF = create_mat(all_keys, full=allSubsMean)
print('done')

This cell reduces the distance values to the values that were set when producing the stimulus set.

In [None]:
sortOutput = []

for impair in range(0, len(all_keys), 2):
    A = all_keys[impair]
    B = all_keys[impair + 1]
    assert A[:len(A)-1] == B[:len(B)-1]
    thisDist = pull_dist(allSubsMeanDF, A, B)
    sortOutput.append(thisDist)
sortOutput = np.array(sortOutput)
sortOutput = np.reshape(sortOutput, (8,8))

<a id='model-plot'></a>
## Plotting model features

Plot each set of the eight selected endpoints, and all of the eight similarity levels in a heatmap, where each cell is the relative distance between those images in participant behavioral ratings

[(back to top)](#TOC)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,10))
im = ax.matshow(sortOutput)
cbar = fig.colorbar(im)

ax.set_xticks(np.arange(8))
ax.set_xticklabels(np.arange(1,9), fontsize=20, **{'fontname':'Arial Narrow'})
#bottom, top = ax.get_xlim()
#ax.set_xlim(bottom + 0.5, top - 0.5)
ax.set_xlabel('Similarity Level', fontsize=28, **{'fontname':'Arial Narrow'})
ax.xaxis.set_ticks_position('bottom')

ax.set_yticks(np.arange(8))
ax.set_yticklabels(chanPairs, fontsize=20, **{'fontname':'Arial Narrow'})
#bottom, top = ax.get_ylim()
#ax.set_ylim(bottom + 0.5, top - 0.5)
ax.set_ylabel('Endpoints', fontsize=28, **{'fontname':'Arial Narrow'})

cbar.ax.set_ylabel('Pixel Distance', fontsize=28, **{'fontname':'Arial Narrow'})
cbar.ax.tick_params(labelsize=20)
for l in cbar.ax.yaxis.get_ticklabels():
    l.set_family("Arial Narrow")
    
    
plt.savefig('normDist', dpi=1200)
plt.show()

print('ACROSS THE TOP: 0 is the most dissimilar (by the model), and 7 is the most similar')
print('DOWN THE SIDE: The numbers are meaningless, its simply the different sets of endpoints')

In [None]:
cmap = matplotlib.cm.get_cmap('cool')

x = np.tile(np.arange(8), 8)
allSubsCorrs = []
fig, axes = plt.subplots(1,3,figsize=(16,7), gridspec_kw={'width_ratios': [3, 3, 1]})
for sub in range(allSubs.shape[0]):
    thisSub = allSubs[sub]
    thisSubDF = create_mat(all_keys, full=thisSub)
    thisOutput = []

    for impair in range(0, len(all_keys), 2):
        A = all_keys[impair]
        B = all_keys[impair + 1]
        assert A[:len(A)-1] == B[:len(B)-1]
        thisDist = pull_dist(thisSubDF, A, B)
        thisOutput.append(thisDist)
    thisOutput = np.array(thisOutput)
    thissubCorr = np.corrcoef(np.vstack((x, thisOutput)))[0,1]
    allSubsCorrs.append(thissubCorr)
    allOutputs = thisOutput if sub == 0 else np.vstack((allOutputs, thisOutput))

sortIX = np.argsort(allSubsCorrs)
colors = []

for si, sub in enumerate(sortIX):
    r,g,b,a = cmap(si/allSubs.shape[0])
    thisOutput = allOutputs[sub]
    z = np.polyfit(x, thisOutput, 1)
    p = np.poly1d(z)
    axes[1].plot(np.arange(-1,9),p(np.arange(-1,9)),color=(r,g,b,0.4), linewidth=15)
    colors.append((r,g,b,0.4))

meanOutputs = np.mean(allOutputs, 0)

z = np.polyfit(x, meanOutputs, 1)
p = np.poly1d(z)
axes[0].plot(np.arange(-1,9),p(np.arange(-1,9)),color=(0.5,0.5,0.5,0.4), linewidth=15,zorder=-5)
axes[0].scatter(x, meanOutputs, 350, color=(1,1,1,1))
axes[0].scatter(x, meanOutputs, 350, color=(0.5,0.5,1,0.7))
axes[0].set_ylim(0,550)
axes[0].set_xlim(-0.5,7.5)
axes[0].set_yticks(np.arange(0,551,100))
axes[0].set_yticklabels(np.arange(0,551,100), fontsize=20, **{'fontname':'Arial Narrow'})
axes[0].set_xticks(np.arange(8))
axes[0].set_xticklabels(np.arange(1,9), fontsize=20, **{'fontname':'Arial Narrow'})
axes[0].set_ylabel('Distance (Pixels)', fontsize=28, **{'fontname':'Arial Narrow'})
axes[0].set_xlabel('Similarity Level', fontsize=28, **{'fontname':'Arial Narrow'})
Corr = np.corrcoef(np.vstack((x, meanOutputs)))[0,1]
axes[0].text(0, 20, 'r = {}'.format(np.around(Corr, 2)), color=(0.5,0.5,1,0.7), 
             fontsize=64, **{'fontname':'DIN Condensed'})

print('Averaged distances -- r = {}'.format(np.around(Corr, 3)))

allSubsCorrs = np.array(allSubsCorrs)
sortCorrs = allSubsCorrs
sortCorrs.sort()
axes[1].set_ylim(0,550)
axes[1].set_xlim(-0.5,7.5)
axes[1].set_yticks([])
axes[1].set_xticks(np.arange(8))
axes[1].set_xticklabels(np.arange(1,9), fontsize=20, **{'fontname':'Arial Narrow'})
axes[1].set_xlabel('Similarity Level', fontsize=28, **{'fontname':'Arial Narrow'})
axes[1].text(0, 20, 'r = {}'.format(np.around(np.mean(sortCorrs), 2)), color=(0.5, 0.5, 0.5, 0.5), 
             fontsize=64, **{'fontname':'DIN Condensed'})
axes[1].plot([0,0.35], [75,75], color=(0.5, 0.5, 0.5, 0.5), linewidth=7)

boots = []
for i in range(10000):
    IX = np.random.choice(sortCorrs.shape[0], sortCorrs.shape[0])
    this = sortCorrs[IX]
    boots.append(np.mean(this))
print('Across participants -- M = {}, CI95 = [{} {}]'.format(np.around(np.mean(boots), 3), 
                                                             np.around(np.percentile(boots, 2.5), 3),
                                                             np.around(np.percentile(boots, 97.5), 3)))

axes[2].set_ylim(-0.5,29.5)
axes[2].barh(np.arange(allSubs.shape[0]), sortCorrs, color=colors)
axes[2].set_yticks(np.arange(-0.8,-0.31))
axes[2].set_yticklabels(np.arange(0,551,100), fontsize=20, **{'fontname':'Arial Narrow'})
axes[2].spines['top'].set_visible(False)
axes[2].spines['left'].set_visible(False)
axes[2].set_xticks(np.arange(-0.8,0.01,0.2))
axes[2].set_xticklabels(np.around(np.arange(-0.8,0.01,0.2),2), fontsize=20, **{'fontname':'Arial Narrow'})
axes[2].set_xlabel('Correlation', fontsize=28, **{'fontname':'Arial Narrow'})
axes[2].set_ylabel('Individual Participants', fontsize=28, labelpad=30, rotation=270, **{'fontname':'Arial Narrow'})
axes[2].yaxis.set_label_position("right")

plt.tight_layout()
#plt.savefig('figures/distanceVal.pdf', dpi=600)
plt.show()

<a id='feature-comp'></a>
# Feature Correlation Computations

[(back to top)](#TOC)

In [None]:
modelLoc = '../synthesis'
imageLoc = 'stim'

In [None]:
chanPairs = ['11-5', '47-34', '56-68', '40-20', '17-85', '79-97', '102-109', '83-101']
simLevels = [0, 14, 29, 43, 57, 71, 86, 100]

Labels = []

for chanPair in chanPairs:
    for simLevel in simLevels:
        for pairMate in ['A', 'B']:
            Labels.append('{}/c{}_{}{}.png'.format(imageLoc, chanPair, simLevel, pairMate))

print('compiled {} image file names'.format(len(Labels)))

In [None]:
def get_tensor(layers):
    activations = []
    for layer in layers:
        thisLayer = graph.get_tensor_by_name("import/{}:0".format(layer))
        activations.append(thisLayer)
    return activations

<a id='initial-incept'></a>
## Initial Inception setup

[(back to top)](#TOC)

In [None]:
model_fn = '{}/tensorflow_inception_graph.pb'.format(modelLoc)

# creating TensorFlow session and loading the model
graph = tf.Graph()
with graph.as_default(), graph.device('/cpu:0'), tf.Session() as sess:
    with tf.gfile.FastGFile(model_fn, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    t_input = tf.placeholder(np.float32, name='input') # define the input tensor
    imagenet_mean = 117.0
    t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0)
    tf.import_graph_def(graph_def, {'input':t_preprocessed})

In [None]:
layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name]

layers1 = [l.split('/')[1] for l in layers if l.split('/')[1].split('2')[0] == 'conv']
layers2 = [l.split('/')[1] for l in layers if 'pool_reduce' in l]

incept_layers = layers1 + layers2
print(len(incept_layers))

In [None]:
GOALS = np.zeros((len(incept_layers), len(simLevels)))
GOALS[:8, :] = 0.25
GOALS[8:, :] = np.arange(0, 1.01, 1/7)

<a id='incept-extract'></a>
## Inception feature extraction

[(back to top)](#TOC)

In [None]:
CORRS = np.zeros((len(incept_layers),len(simLevels),len(chanPairs)))
with graph.as_default(), graph.device('/cpu:0'), tf.Session() as sess:
    acts = []
    for i, imfile in enumerate(Labels):
        print('compiling image {}/{} for inception'.format(i + 1, len(Labels)))
        clear_output(wait=True)
        img0 = np.float32(Image.open(imfile).convert("RGB"))
        act = sess.run(get_tensor(incept_layers), {t_input:img0})
        acts.append(act)
    for lnum, layer in enumerate(incept_layers):
        for anum, axis in enumerate(np.arange(0, 128, 16)):
            print('computing correlation for layer {}/{}, axis {}/{}'.format(lnum + 1, len(incept_layers), 
                                                                            anum + 1, len(chanPairs)))
            clear_output(wait=True)
            for imnum in range(axis, axis+16):
                thisact = acts[imnum][lnum]
                thislayer = thisact if imnum  == axis else np.concatenate((thislayer, thisact))
        
            thislayer = thislayer.reshape(16, -1)
            corrs = np.corrcoef(thislayer)
            relcorrs = corrs[np.arange(0,16,2), np.arange(1,17,2)]
            CORRS[lnum, :, anum] = relcorrs  

<a id='initial-vgg'></a>
## Initial VGG19 setup

[(back to top)](#TOC)

In [None]:
base_model = keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)



model = Model(inputs=base_model.input, outputs=[base_model.get_layer('block1_pool').output, 
                                                base_model.get_layer('block2_pool').output, 
                                                base_model.get_layer('block3_pool').output, 
                                                base_model.get_layer('block4_pool').output, 
                                                base_model.get_layer('block5_pool').output, 
                                                base_model.get_layer('fc1').output, 
                                                base_model.get_layer('fc2').output, 
                                                base_model.get_layer('predictions').output])

<a id='vgg-extract'></a>
## VGG feature extraction

[(back to top)](#TOC)

In [None]:
batch = np.empty((0, 224, 224, 3), float)
for i, imfile in enumerate(Labels):
    print('compiling image {}/{} for VGG19'.format(i + 1, len(Labels)))
    clear_output(wait=True)
    img = image.load_img(imfile, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    batch = np.concatenate((batch, x))
print('making predictions')   
b1p, b2p, b3p, b4p, b5p, fc1, fc2, pred = model.predict(batch)

In [None]:
VGG19 = np.zeros((7,len(simLevels),len(chanPairs)))
for lnum, layer in enumerate([b1p, b2p, b3p, b4p, b5p, fc1, fc2]):
    print(layer.shape)
    for anum, axis in enumerate(np.arange(0, 128, 16)):
        print('computing correlation for layer {}/{}, axis {}/{}'.format(lnum + 1, 7, 
                                                                        anum + 1, len(chanPairs)))
        clear_output(wait=True)
        thisact = layer[axis: axis+16]
        thislayer = thisact.reshape(16, -1)
        corrs = np.corrcoef(thislayer)
        relcorrs = corrs[np.arange(0,16,2), np.arange(1,17,2)]
        VGG19[lnum, :, anum] = relcorrs

<a id='layer-heatmaps'></a>
## Plotting layer heatmaps

[(back to top)](#TOC)

In [None]:
inceptNames = ['2D0', '2D1', '2D2', '3A', '3B', '4A', '4B', '4C', '4D', '4E', '5A', '5B']
VGGNames = ['B1P', 'B2P', 'B3P', 'B4P', 'B5P', 'FC1', 'FC2']
layerNames = [inceptNames, inceptNames, VGGNames] 

In [None]:
fig, axes = plt.subplots(1,3, figsize=(24,8))
for i, (ax, source) in enumerate(zip(axes, [GOALS, CORRS, VGG19])):
    toPlot = source if len(source.shape) ==2 else np.mean(source, 2)
    im = ax.imshow(toPlot, vmin=0, vmax=1, cmap='cool')
    ax.set_xticks(np.arange(8))
    ax.set_xticklabels(np.arange(1,9), fontsize=28, **{'fontname':'Arial Narrow'})
    if i == 5:
        ax.set_yticks([])
        ax.set_yticklabels([])
    else:
        ax.set_yticks(np.arange(len(layerNames[i])))
        bottom, top = ax.get_ylim()
        #ax.set_ylim(bottom + 0.5, top - 0.5)
        ax.set_yticklabels(layerNames[i], fontsize=28, **{'fontname':'Arial Narrow'})
    
fig.subplots_adjust(bottom=0.1, top=0.9, left=0.1, right=0.8,
                    wspace=0.2, hspace=0.2)

cb_ax = fig.add_axes([0.83, 0.1, 0.02, 0.8])
cbar = fig.colorbar(im, cax=cb_ax)

cbar.ax.tick_params(labelsize=28)
for l in cbar.ax.yaxis.get_ticklabels():
    l.set_family("Arial Narrow")
cbar.set_ticks(np.arange(0, 1.1, 0.25))
cbar.set_ticklabels(np.arange(0, 1.1, 0.25))

plt.savefig('figures/modelVal.pdf', dpi=600)
plt.show()

<a id='layer-scatters'></a>
## Plotting relevant layer scatterplots

[(back to top)](#TOC)

In [None]:
#colorDict = dict(zip([8,9,10,11], [(218, 67, 68), (241,161,104), (78, 128, 130), (79, 200, 120)]))
blankcols = []
for x in [0.35, 0.45, 0.55, 0.65]:
    r,g,b,a = spot_color('cool', x)
    blankcols.append((r*255,g*255,b*255))
colorDict = dict(zip([8,9,10,11], blankcols))

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(16,5), sharex=True, sharey=True)
for ax, layerIX in zip(axes, range(8,12)):
    rr,gg,bb = colorDict[layerIX]
    rr, gg, bb = rr/255, gg/255, bb/255
    for axis in range(CORRS.shape[2]):
        ax.scatter(GOALS[layerIX], CORRS[layerIX,:,axis], 150, alpha=0.4, color=(rr,gg,bb))
    if layerIX == 8:
        ax.set_ylabel('Inception Outcome', fontsize=28, **{'fontname':'Arial Narrow'})
        ax.set_yticks(np.arange(0,1.01, 0.25))
        ax.set_yticklabels(np.arange(0,1.01, 0.25), fontsize=20, **{'fontname':'Arial Narrow'})
    ax.set_xticks(np.arange(0,1.01, 0.25))
    ax.set_xticklabels(np.arange(0,1.01, 0.25), fontsize=20, **{'fontname':'Arial Narrow'})
    ax.text(0.8, 0, inceptNames[layerIX], color=(rr,gg,bb,1), fontsize=64, **{'fontname':'DIN Condensed'})

fig.text(0.5, -0.05, 'Correlation Goal', ha='center', fontsize=28, **{'fontname':'Arial Narrow'})
plt.xlim(-0.02,1.02)
plt.ylim(-0.02,1.02)
plt.tight_layout()
plt.savefig('figures/modelCorr.pdf', dpi=600, bbox_inches = "tight")
plt.show()

<a id='feature-corrs'></a>
## Printing feature correlation values

[(back to top)](#TOC)

In [None]:
from scipy.stats import linregress, t, norm

In [None]:
print()
print('INCEPTION')
for layerIX in range(8):
    diffs = []
    print()
    print('------------ layer {} ------------'.format(layerIX+1))
    allX = np.repeat(GOALS[9][:, np.newaxis], 8, axis=1).flatten()
    allY = CORRS[layerIX].flatten()
    forCorr = np.vstack((allX, allY))
    for j in range(allY.shape[0]):
        for k in range(j+1, allY.shape[0]):
            diff = np.absolute(allY[j] - allY[k])
            diffs.append(diff)
    print('all points r = {}, SD = {}, all points d = {}, CI95 = [{} {}]'.format(np.around(np.corrcoef(forCorr)[0,1],3),
                                                                                 np.around(np.std(allY), 3),
                                                                                 np.around(np.mean(diffs),3),
                                                                                 np.around(np.percentile(diffs, 2.5),3),
                                                                                 np.around(np.percentile(diffs, 97.5),3)))
    
    for axis in range(CORRS.shape[2]):
        axdiffs = []
        thisY = CORRS[layerIX,:,axis]
        for j in range(thisY.shape[0]):
            for k in range(j+1, allY.shape[0]):
                diff = np.absolute(thisY[j] - allY[k])
                axdiffs.append(diff)
        print('-----axis {}, d = {}, CI95 = [{} {}]'.format(axis+1,np.around(np.mean(axdiffs),3),
                                                                    np.around(np.percentile(axdiffs, 2.5),3),
                                                                    np.around(np.percentile(axdiffs, 97.5),3)))


for layerIX in range(8,12):
    print()
    print('------------ layer {} ------------'.format(layerIX+1))
    allX = np.repeat(GOALS[layerIX][:, np.newaxis], 8, axis=1).flatten()
    allY = CORRS[layerIX].flatten()
    forCorr = np.vstack((allX, allY))
    avgX = GOALS[layerIX]
    avgY = np.mean(CORRS[layerIX], 1)
    forCorr2 = np.vstack((avgX, avgY))
    for j in range(allY.shape[0]):
        for k in range(j+1, allY.shape[0]):
            diff = np.absolute(allY[j] - allY[k])
            diffs.append(diff)
    print('all points r = {}, avg = {}, SD = {}, all points d = {}'.format(np.around(np.corrcoef(forCorr)[0,1],3),
                                                                           np.around(np.corrcoef(forCorr2)[0,1],3),
                                                                           np.around(np.std(allY), 3), 
                                                                           np.around(np.mean(diffs),3)))
    
    for axis in range(CORRS.shape[2]):
        thisX = GOALS[layerIX]
        thisY = CORRS[layerIX,:,axis]
        forCorr = np.vstack((thisX, thisY))
        print('-----axis {}, r = {}'.format(axis+1,np.around(np.corrcoef(forCorr)[0,1],3)))

        
print()
print('VGG19')
for layerIX in range(7):
    print()
    print('------------ layer {} ------------'.format(layerIX+1))
    allX = np.repeat(GOALS[9][:, np.newaxis], 8, axis=1).flatten()
    allY = VGG19[layerIX].flatten()
    forCorr = np.vstack((allX, allY))
    avgX = GOALS[9]
    avgY = np.mean(VGG19[layerIX], 1)
    forCorr2 = np.vstack((avgX, avgY))
    print('all points r = {}, avg = {}, SD = {}'.format(np.around(np.corrcoef(forCorr)[0,1],3), 
                                                        np.around(np.corrcoef(forCorr2)[0,1],3),
                                                        np.around(np.std(allY), 3)))
    for axis in range(CORRS.shape[2]):
        thisX = GOALS[9]
        thisY = VGG19[layerIX,:,axis]
        forCorr = np.vstack((thisX, thisY))
        print('-----axis {}, r = {}'.format(axis+1,np.around(np.corrcoef(forCorr)[0,1],3)))
    
for layerIX in range(4):
    for layerIX2 in range(4, 7):
        allX = np.repeat(GOALS[9][:, np.newaxis], 8, axis=1).flatten()
        allY = VGG19[layerIX].flatten()
        allZ = VGG19[layerIX2].flatten()
        forCorr = np.corrcoef(np.vstack((allX, allY, allZ)))
        tstat, pstat = corr_compare(forCorr[0,1], forCorr[0,2], forCorr[1,2], 64, twotailed=True, conf_level=0.95)
        print('difference between layers {} ({}) and {} ({}): r val btw = {}: t = {}, p ='.format(layerIX + 1, 
                                                                                                  np.around(forCorr[0,1],5),
                                                                                                  layerIX2 + 1, 
                                                                                                  np.around(forCorr[0,2],5),
                                                                                                  np.around(forCorr[1,2],5),
                                                                                                  np.around(tstat,3), 
                                                                                                  pstat))

<a id='fmri-analysis'></a>
# fMRI Analysis

[(back to top)](#TOC)

<a id='prelearning-corr'></a>
## Pre-learning correlation
The following analyses find the representational change at each similarity level, quantifying the true effect in the sample, then shuffle AB pairings as many times as is dictated above and store these values.

This step reads in one of two numpy arrays, the first contains only the correlations for the relevant image pairings, while the other contains the entire 16 x 16 imagewise correlation matrix for every run.

[(back to top)](#TOC)

In [None]:
# List the ROIs to conduct the analysis in
roiList = ['V1', 'V2', 'LOC', 'Fus', 'PHC', 'IT', 'EC', 'perirhinal']  
resultDict36 = {}
noiseDict36 = {}
resultDict41 = {}
noiseDict41 = {}

In [None]:
# Base file names for the two types of numpy arrays - these are filled later in the code
critBase = '{}/allsubs_allruns_{}.npy'
allBase = '{}/allims_runs_{}.npy'

In [None]:
for i, roi in enumerate(roiList[:]):

    # This file is only the relevant correlations
    CritPairs = np.load(critBase.format('surf', roi))
    print('Running {}'.format(roi))

    # This computes the true effect in the whole sample.
    all_pre = CritPairs[:,0]
    print(all_pre.shape)

    # This saves the output to a csv file
    outFile = pd.DataFrame(all_pre)
    outFile.to_csv('./csvout/{}_surf.csv'.format(roi))
print('Finished')

In [None]:
shuffles = 50000

In [None]:
for rn, roi in enumerate(roiList):
    print('Running {}'.format(roi))
    clear_output(wait=True)

    # read in the relevant data, subset to only the critical values
    dat = pd.read_csv('./csvout/{}_surf.csv'.format(roi))
    raw = np.array(dat.iloc[:,1:])

    # convert to pandas dataframe suitable for this analysis
    melted = pd.melt(dat, id_vars='Unnamed: 0', var_name='simLevel', value_name='Change')
    melted.rename(columns={'Unnamed: 0':'subID'}, inplace=True)
    rvals = []

    for subject in range(raw.shape[0]):
        # compute actual ~ predicted correlation for a given held out subject
        corr = np.corrcoef(np.vstack((np.arange(8), raw[subject])))[0,1]
        # Compile subjects in list
        rvals.append(corr)
    rvals = np.array(rvals)
    RVALS = rvals if rn == 0 else np.vstack((RVALS, rvals))
    print(RVALS.shape)
    us = np.zeros((shuffles))
    # Bootstrap resample a number of times, find 95% CI
    for shuf in range(shuffles):
        print('Running {} -- {}/{}'.format(roi, shuf+1, shuffles))
        clear_output(wait=True)
        test = np.random.choice(rvals.shape[0], rvals.shape[0])
        u = np.mean(rvals[test])
        us[shuf] = u
    # Fisher transform for statistical analysis
    us = np.arctanh(us)
    LB = np.percentile(us, 2.5)
    U = np.mean(us)
    UB = np.percentile(us, 97.5)
    resultDict41['{}'.format(roi)] = [LB, U, UB]
#np.save("VisSimR.npy", RVALS)

In [None]:
for i, roi in enumerate(roiList):
    LB, U, UB = resultDict41['{}'.format(roi)]
    print('For {} -- M = {}, 95\% CI = [{} {}]'.format(roi, 
                                                    np.around(U, 3),
                                                    np.around(LB, 3), 
                                                    np.around(UB, 3)))

<a id='prelearning-noise'></a>
## Comparing to noise

[(back to top)](#TOC)

In [None]:
for i, roi in enumerate(roiList[:]):
    # This is every intercorrelation (16 x 16)
    AllIms = np.load(allBase.format('surf', roi))
    us = np.zeros((shuffles))
    for shuf in range(shuffles):
        print('Running {} -- {}/{}'.format(roi, shuf+1, shuffles))
        clear_output(wait=True)
        all_pre, _ = shufPrepost(AllIms)  
        rvals = []
        for subject in range(all_pre.shape[0]):
            # compute correlation for a given subject
            corr = np.corrcoef(np.vstack((np.arange(8), all_pre[subject])))[0,1]
            # Compile subjects in list
            rvals.append(corr)
        us[shuf] = np.mean(np.array(rvals))
    us = np.arctanh(us)
    _, trueU, _ = resultDict41['{}'.format(roi)]
    percentile = (us > trueU).sum() / shuffles
    LB = np.percentile(us, 2.5)
    U = np.mean(us)
    UB = np.percentile(us, 97.5)
    noiseDict41['{}'.format(roi)] = [LB, U, UB, percentile]
    plt.hist(us,100)
    plt.axvline(LB)
    plt.axvline(UB)
    #plt.savefig('figures/noise_{}.pdf'.format(roi), dpi=600)
    plt.show()
print('Finished')

In [None]:
for i, roi in enumerate(roiList):
    LB, U, UB, percentile = noiseDict41['{}'.format(roi)]
    print('For {} -- M = {}, CI95 = [{} {}], percentile = {}'.format(roi,
                                                                     np.around(U, 4),
                                                                     np.around(LB, 4), 
                                                                     np.around(UB, 4),
                                                                     np.around(percentile, 4)))

In [None]:
plottingDict = {}

masterROIs = ['V1', 'V2', 'LOC', 'Fus', 'PHC', 'IT', 'EC', 'perirhinal']
masterCOLs = [(29, 157, 119), (217, 95, 2), (117, 112, 179), (231, 41, 138), 
              (101, 166, 30), (230, 171, 0), (165, 118, 28), (102, 102, 102)]
masterNOM = ['V1', 'V2', 'LO', 'FG', 'PHC', 'IT', 'EC', 'PRC']
nameDict = dict(zip(masterROIs, masterNOM))
colorDict = dict(zip(masterROIs, masterCOLs))

In [None]:
for i, roi in enumerate(roiList):
    dat = np.array(pd.read_csv('./csvout/{}_surf.csv'.format(roi)))[:, 1:]
    us = np.zeros((shuffles, 8))
    for shuf in range(shuffles):
        IX = np.random.choice(dat.shape[0], dat.shape[0])
        test = dat[IX, :]
        us[shuf] = (np.mean(test, 0))
    LB = np.percentile(us, 2.5, 0)
    U = np.mean(us, 0)
    UB = np.percentile(us, 97.5, 0)
    plottingDict['{}'.format(roi)] = [LB, U, UB]

In [None]:
for i, roi in enumerate(roiList):
    r,g,b = colorDict[roi]
    r, g, b = r/255, g/255, b/255
    LB, U, UB = plottingDict['{}'.format(roi)]
    fig, ax = plt.subplots(figsize=(6,8))
    plt.fill_between(np.arange(8), LB, UB, color = (1, 1, 1, 0.7), zorder=-2, lw=0)
    plt.fill_between(np.arange(8), LB, UB, color = (r, g, b, 0.1), zorder=-2, lw=0)
    plt.plot(U, lw=4, c=[r,g,b,1])
    plt.scatter(np.arange(8), U, s=150, lw=1.5, edgecolor=[[r,g,b,1]], facecolor=[[r,g,b,0.6]])
    plt.xticks(np.arange(8), np.arange(1,9), fontsize=18, **{'fontname':'Arial Narrow'})
    plt.ylabel('Representational Change', fontsize=24, **{'fontname':'Arial Narrow'})
    plt.xlabel('Similarity Level', fontsize=24, **{'fontname':'Arial Narrow'})
    #plt.savefig('figures/simPattern_{}.pdf'.format(roi), dpi=600)
    plt.show()

In [None]:
def get_plot_params(bottom, top):
    fullRange = top - bottom
    roundBot = np.around(bottom * 20)/20
    roundTop = np.around(top * 20)/20
    textLoc = roundBot + (0.03 * fullRange)
    return roundBot, roundTop, textLoc

In [None]:
roiList = ['V1', 'V2', 'LOC', 'IT', 'Fus', 'PHC']
sig = ['', '', '*', '', '', '*']
sigdict = dict(zip(roiList, sig))
colorVals = np.arange(0,1.01,1/len(roiList))
print(colorVals)

In [None]:
fig, axes = plt.subplots(1, len(roiList), figsize=(int(len(roiList)*3.5), 8), sharey=False)

for i, (ax, roi) in enumerate(zip(axes, roiList)):
    r,g,b,a = spot_color('cool', colorVals[i])
    r, g, b = r/1.1, g/1.1, b/1.1
    LB, U, UB = plottingDict['{}'.format(roi)]
    ax.fill_between(np.arange(8), LB, UB, color = (1, 1, 1, 0.7), zorder=-2, lw=0)
    ax.fill_between(np.arange(8), LB, UB, color = (r, g, b, 0.1), zorder=-2, lw=0)
    ax.plot(U, lw=4, c=[r,g,b,1])
    ax.scatter(np.arange(8), U, s=150, lw=1.5, edgecolor=[[r,g,b,1]], facecolor=[[r,g,b,0.6]])
    ax.set_xticks(np.arange(8))
    ax.set_xticklabels(np.arange(1,9), fontsize=20, **{'fontname':'Arial Narrow'})
    ax.set_xlabel('Model Similarity Level', color=(0, 0, 0, 0), fontsize=28, **{'fontname':'Arial Narrow'})
    bottom, top = ax.get_ylim()
    newbot, newtop, textloc = get_plot_params(bottom, top)
    ax.text(0, textloc, '{}{}'.format(nameDict[roi], sigdict[roi]), color=(r,g,b,1), fontsize=64, **{'fontname':'DIN Condensed'})
    if i == 0:
        ax.set_ylabel('Neural Similarity', fontsize=28, **{'fontname':'Arial Narrow'})
    ax.set_yticks(np.around(np.arange(newbot, newtop+.01, 0.05),2))
    ax.set_yticklabels(np.around(np.arange(newbot, newtop+.01, 0.05),2),
                       fontsize=20, **{'fontname':'Arial Narrow'})
    ax.set_ylim(newbot, top)

fig.text(0.5, 0.02, 'Model Similarity Level', ha='center', fontsize=28, **{'fontname':'Arial Narrow'})
plt.tight_layout()
plt.savefig('figures/manuscript_vis.pdf', dpi=600)
plt.show()

<a id='arrangement-visualize'></a>
# Visualize Arrangement Trials

[(back to top)](#TOC)

What follows are some functions and code to produce visualizations of a given arrangement trial or trials, for a given participant.

<a id='helpers-visualize'></a>
## Helper functions

[(back to top)](#TOC)

In [None]:
def reproduce_trial(subject, trial, save=False, show=False):
    master = Image.new("RGBA", (1000,1000), (255,255,255,0))
    draw = ImageDraw.Draw(master)
    draw.ellipse((88, 88, 912, 912), fill=(0,0,0))
    draw.ellipse((96, 96, 904, 904), fill=(255,255,255))
    draw.ellipse((104, 104, 896, 896), fill=(0,0,0))
    sub_data = data_full[data_full['sub']==subject]
    coord_dict = sub_data.loc[sub_data['trial'] == trial, 'coords'].iloc[0] 
    rt = sub_data.loc[sub_data['trial'] == trial, 'time'].iloc[0] 
    keys = retrieve_keys(coord_dict)
    for key in keys:
        x, y = retrieve_coords(coord_dict,key)
        x -= 100
        y -= 100
        currIm = Image.open('stim/{}.png'.format(key))
        currIm.thumbnail((120,120))
        master.paste(currIm, (x,y), currIm)
    master = master.resize((2000, 2000), Image.ANTIALIAS)
    arrangement = np.asarray(master)
    if show:
        plt.clf()
        plt.figure(figsize=(20,20))
        plt.imshow(arrangement)
        plt.axis('off')
    if save:
        plt.savefig(str(subject)+'_'+str(trial)+'.pdf', dpi=1000)
    if show:
        plt.show()
    return rt, arrangement
    
def giffify_trials(subject, time_per_trial='match', total_time=20, verbose=False):
    sub_data = data_full[data_full['sub']==subject]
    sub_trials = sub_data.shape[0]
    images=[]
    rts=[]
    for trial in range(sub_trials):
        clear_output(wait=True) if verbose else print('',end='')
        print('subject: {}, trial {}/{} done'.format(subject, trial+1, sub_trials)) if verbose else print('',end='')
        rt, arrangement = reproduce_trial(subject, trial)
        images.append(arrangement)
        rts.append(int(rt))
    if time_per_trial == 'match':
        whole_time = np.sum(rts)
        times = list((np.array(rts) / whole_time) * 20)
    else:
        times = time_per_trial
    imageio.mimsave(str(subject)+'_all.gif', images, duration=times)
        
    

<a id='example-visualize'></a>
## Example visualizations

This cell will produce a gif of all trials for one partipant, then a single trial visualization for another.

[(back to top)](#TOC)

In [None]:
giffify_trials('26HoFBT4', verbose=True)
rt, arrangement = reproduce_trial('5kwO4weo', 1, save=True, show=True)