In [1]:
import numpy as np
import string 
import scipy.io
from sklearn.metrics import mean_squared_error
from scipy.spatial import distance
import itertools
from sklearn import svm
import statsmodels.api as sm # import statsmodels 
from scipy import signal


class Scan(object):
    def __init__(self,activations,timestamp, step,prev_words=None,next_words=None,all_words=None,all_pos=None):
        self.activations = activations
        self.timestamp = timestamp
        self.prev_words = prev_words
        self.next_words = next_words
        self.step = step
        self.all_words = all_words
        self.all_pos = all_pos
        self.brain3d = None
        
def eval(dists,e_dists):
    nn_index = np.argmin(dists,axis=1)
    accuracy_on_test = np.mean(nn_index == np.argmax(np.eye(dists.shape[0]),axis=1))


    b_acc = []
    e_b_acc = []
    for i,j in itertools.combinations(np.arange(dists.shape[0]), 2):
        right_match = dists[i,i] + dists[j,j]
        wrong_match = dists[i,j] + dists[j,i]
        b_acc.append(right_match < wrong_match)

        e_right_match = e_dists[i,i] + e_dists[j,j]
        e_wrong_match = e_dists[i,j] + e_dists[j,i]
        e_b_acc.append(e_right_match < e_wrong_match)

    #print("binary accuracy: ", np.mean(b_acc)," ", np.mean(e_b_acc))
    return np.mean(b_acc),np.mean(e_b_acc),b_acc,e_b_acc

  from pandas.core import datetools


In [2]:
embeddings_0 = np.load("../data/subject_"+str(1)+"_lstm_"+str(0)+"_emb_objects.npy")
embeddings_1 = np.load("../data/subject_"+str(1)+"_lstm_"+str(1)+"_emb_objects.npy")

In [3]:
subject_id = 1

block_pos = {}

for block_id in [1,2,3,4]:
    block_pos[block_id] = np.load("subject_"+str(subject_id)+"_block_"+str(block_id)+"_pos.npy")

block_scans = np.load("subject_"+str(subject_id)+"_scan_objects.npy")

In [4]:
detrended_block_scans = {1:[],2:[],3:[],4:[]}
raw_block_scans = {1:[],2:[],3:[],4:[]}

for block_id in [1,2,3,4]:
        for i in np.arange(len(block_scans.item()[block_id])):
            raw_block_scans[block_id].append(block_scans.item()[block_id][i].activations[0])
                           
        detrended_block_scans[block_id] = signal.detrend(raw_block_scans[block_id],type="constant")

In [19]:
def prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids):
    train_features = {'position':[],'pos_tag':[],'lstm_1':[],'lstm_0':[],'lstm_prev_1':[],'lstm_prev_0':[]}
    train_brain_activations = []
    for train_block_id in train_block_ids:
        for i in np.arange(len(block_scans.item()[train_block_id])):
            scan = block_scans.item()[train_block_id][i]
            if (scan.step - 4) in embeddings_1.item()[train_block_id].keys():
                train_features['position'].append(scan.step)
                train_features['pos_tag'].append(np.sum(scan.all_pos,axis=0))
                train_features['lstm_1'].append(np.mean(embeddings_1.item()[train_block_id][scan.step],axis=0))
                train_features['lstm_0'].append(np.mean(embeddings_0.item()[train_block_id][scan.step],axis=0))
                train_features['lstm_prev_1'].append(np.mean(embeddings_1.item()[train_block_id][scan.step-4],axis=0))
                train_features['lstm_prev_0'].append(np.mean(embeddings_0.item()[train_block_id][scan.step-4],axis=0))
                train_brain_activations.append(detrended_block_scans[block_id][i])
        #print(scan.step)

    test_features = {'position':[],'pos_tag':[],'lstm_1':[],'lstm_0':[],'lstm_prev_1':[],'lstm_prev_0':[]}
    test_brain_activations = []
    for test_block_id in test_block_ids:
        for scan in block_scans.item()[test_block_id]:
            if (scan.step - 4) in embeddings_1.item()[test_block_id].keys():
                test_features['position'].append(scan.step)
                test_features['pos_tag'].append(np.sum(scan.all_pos,axis=0))
                test_features['lstm_1'].append(np.mean(embeddings_1.item()[test_block_id][scan.step],axis=0))
                test_features['lstm_0'].append(np.mean(embeddings_0.item()[test_block_id][scan.step],axis=0))
                test_features['lstm_prev_1'].append(np.mean(embeddings_1.item()[test_block_id][scan.step-4],axis=0))
                test_features['lstm_prev_0'].append(np.mean(embeddings_0.item()[test_block_id][scan.step-4],axis=0))
                test_brain_activations.append(scan.activations[0])
            #print(scan.step)
    return train_features,train_brain_activations,test_features,test_brain_activations


def train_model(X,y):
    #X = sm.add_constant(X) ## let's add an intercept (beta_0) to our model
    # Note the difference in argument order
    model = sm.OLS(y, X).fit() ## sm.OLS(output, input)
    
    return model


def MRR(distances):
    prec_at_corrects = []
    ranks = []
    sorted_indexes = np.argsort(distances,axis=1)
    for i in np.arange(len(distances)):
        #print(i)
        correct_at = np.where(sorted_indexes[i] == i)[0] + 1
        #print("Reciprocal Rank",correct_at)
        prec_at_correct = 1.0/correct_at
        #print("precision at ",correct_at,": ",prec_at_correct)
        prec_at_corrects.append(prec_at_correct)
        ranks.append(correct_at)
    
    print("MRR: ",np.mean(prec_at_corrects)," ",np.mean(ranks))
    return np.mean(ranks), np.mean(prec_at_corrects), ranks,prec_at_corrects

def test_model(model,X_t,y_t):
    #X_t = sm.add_constant(X_t) ## let's add an intercept (beta_0) to our model
    pred_t = model.predict(X_t)
    
    if len(pred_t.shape) == 1:
        pred_t = np.reshape(pred_t,(len(pred_t),1))
        y_t = np.reshape(y_t,(len(y_t),1))

    cosine_dists = distance.cdist(pred_t,y_t,'cosine')
    euc_dists =  distance.cdist(pred_t,y_t,'euclidean')
    
    print("cosine dist >>")
    mean_ranks_c = MRR(cosine_dists)
    
    print("euc_dists dist >>")
    mean_ranks_e = MRR(euc_dists)
    
    print("binary accuracy >>")
    c_acc, e_acc, _,_ = eval(cosine_dists,euc_dists)
    print(c_acc,e_acc)
    
    return c_acc, e_acc

    

In [20]:

train_block_ids = [1,2,3]
test_block_ids = [4]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['position'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['position'] ## Y usually means our output/dependent variable


model = train_model(X,y)
test_model(model,X_t,y_t)


train_block_ids = [1,2,4]
test_block_ids = [3]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['position'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['position'] ## Y usually means our output/dependent variable

model = train_model(X,y)
test_model(model,X_t,y_t)



train_block_ids = [1,3,4]
test_block_ids = [2]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['position'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['position'] ## Y usually means our output/dependent variable

model = train_model(X,y)
test_model(model,X_t,y_t)



train_block_ids = [2,3,4]
test_block_ids = [1]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['position'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['position'] ## Y usually means our output/dependent variable

model = train_model(X,y)
test_model(model,X_t,y_t)



cosine dist >>
MRR:  0.017749266454767033   183.0
euc_dists dist >>
MRR:  0.017800852927025462   182.44931506849315
binary accuracy >>
0.0 0.010913743790456119
cosine dist >>
MRR:  0.023314611787025357   132.5
euc_dists dist >>
MRR:  0.031855638771678504   96.61742424242425
binary accuracy >>
0.0 0.4815646963935937
cosine dist >>
MRR:  0.0189874823175414   169.0
euc_dists dist >>
MRR:  0.020238638095098377   172.95548961424333
binary accuracy >>
0.0 0.2905009184682775
cosine dist >>
MRR:  0.01957716467635644   163.0
euc_dists dist >>
MRR:  0.01985333379303804   160.9046153846154
binary accuracy >>
0.0 0.043076923076923075


(0.0, 0.043076923076923075)

In [21]:

train_block_ids = [1,2,3]
test_block_ids = [4]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['pos_tag'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['pos_tag'] ## Y usually means our output/dependent variable


model = train_model(X,y)
test_model(model,X_t,y_t)


train_block_ids = [1,2,4]
test_block_ids = [3]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['pos_tag'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['pos_tag'] ## Y usually means our output/dependent variable

model = train_model(X,y)
test_model(model,X_t,y_t)



train_block_ids = [1,3,4]
test_block_ids = [2]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['pos_tag'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['pos_tag'] ## Y usually means our output/dependent variable

model = train_model(X,y)
test_model(model,X_t,y_t)



train_block_ids = [2,3,4]
test_block_ids = [1]
train_features,train_brain_activations,test_features,test_brain_activations = \
                                                prepare_LSTM_Diagnostic_data(train_block_ids,test_block_ids)


X = np.concatenate([train_features['lstm_0'],train_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y = train_features['pos_tag'] ## Y usually means our output/dependent variable
X_t = np.concatenate([test_features['lstm_0'],test_features['lstm_1']],axis=1) ## X usually means our input variables (or independent variables)
y_t = test_features['pos_tag'] ## Y usually means our output/dependent variable

model = train_model(X,y)
test_model(model,X_t,y_t)



cosine dist >>
MRR:  0.4403195210638416   11.547945205479452
euc_dists dist >>
MRR:  0.43552268886366513   17.616438356164384
binary accuracy >>
0.993602288122836 0.9934517537257264
cosine dist >>
MRR:  0.4810815942966132   8.693181818181818
euc_dists dist >>
MRR:  0.47322600983211116   10.43939393939394
binary accuracy >>
0.9927987095287476 0.9932307869570227
cosine dist >>
MRR:  0.47775870864261805   13.492581602373887
euc_dists dist >>
MRR:  0.467783871392112   15.216617210682493
binary accuracy >>
0.9922813338985446 0.9925639395223965
cosine dist >>
MRR:  0.47880161968712137   11.458461538461538
euc_dists dist >>
MRR:  0.48979442120770394   13.612307692307692
binary accuracy >>
0.9932003798670466 0.9935422602089269


(0.9932003798670466, 0.9935422602089269)