In [10]:
import numpy as np
import string 
import scipy.io
from sklearn.metrics import mean_squared_error
from scipy.spatial import distance
import itertools
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.neural_network import MLPRegressor
from sklearn.decomposition import *



class Scan(object):
    def __init__(self,activations,timestamp, step,prev_words=None,next_words=None,all_words=None,all_pos=None,all_speak_features=None,current_translated_words=None):
        self.activations = activations
        self.timestamp = timestamp
        self.prev_words = prev_words
        self.next_words = next_words
        self.step = step
        self.all_words = all_words
        self.current_translated_words = current_translated_words
        self.all_pos = all_pos
        self.all_speak_features = all_speak_features
        self.brain3d = None
        
        
        
subject_id = 1


In [11]:
def fold_brain_features(block_features, block_scans, block_ids):
    brains = []
    features = []
    for block_id in block_ids:
        for scan in block_scans.item()[block_id]:
            block_step = scan.step  - block_scans.item()[block_id][0].step
            brains.append(scan.activations[0])

            related_features = []
            i = 0
            while i<4 and (block_step + i) < len(block_features[block_id]):
                related_features.append(block_features[block_id][block_step + i])
                i += 1

            features.append(max(related_features))  
    
    print(len(features),len(brains))
    brains = np.asarray(brains)
    brains = (brains - np.min(brains,axis=0))/(np.max(brains,axis=0) - np.min(brains,axis=0) + 0.000001)
    return brains, features     


def evaluate_fold_brain_features(feature_model, block_scans,test_blocks,train_blocks):
    brains_train, features_train = fold_brain_features(feature_model,block_scans,block_ids=train_blocks)
    brains_test, features_test = fold_brain_features(feature_model,block_scans,block_ids=test_blocks)


    clf = SVC(kernel='linear',class_weight='balanced', C=0.001)
    clf.fit(brains_train, features_train)

    prediction_train = clf.predict(brains_train)
    prediction_test = clf.predict(brains_test)

    train_accuracy = np.mean(features_train == prediction_train)
    test_accuracy = np.mean(features_test == prediction_test)

    print("train accuracy: ",train_accuracy, " test accuracy: ",test_accuracy)
    cnf_matrix = confusion_matrix(features_test, prediction_test)
    print(cnf_matrix)

In [12]:
reduced_brain_vectors = np.load("../processed_data/512reducted_brain_scans.npy")
not_reduced_brain_vectors = np.load("../processed_data/reducted_brain_scans.npy")

reduced_brain_vectors_words = np.load("../processed_data/reducted_words.npy")



In [13]:
len(reduced_brain_vectors)

1295

In [14]:
len(reduced_brain_vectors_words)

1295

In [15]:
story_features = scipy.io.loadmat('../data/story_features.mat') 
subject_file = scipy.io.loadmat('../data/'+'subject_'+str(subject_id)+'.mat') 
part_of_speaches_feature_id = 8
speach_feature_id = 1
part_of_speaches = story_features['features'][0][part_of_speaches_feature_id][1][0]
part_of_speaches_features = story_features['features'][0][part_of_speaches_feature_id][2]
speaches = story_features['features'][0][speach_feature_id][1][0]
speach_features = story_features['features'][0][speach_feature_id][2]


speach_feature_id = 1
motion_feature_id = 2
emotion_feature_id = 3
verbs_feature_id = 4
characters_feature_id = 5
visual_wordlength_feature_id = 6
Word_Num_feature_id = 7
part_of_speaches_feature_id = 8
Dependency_role_feature_id = 9

word_index = 0
time_index = 1

In [16]:
actual_words = []
word_times = []
word_pos = []
speach_states = []
for i in np.arange(subject_file['words'].shape[1]):
    actual_words.append(subject_file['words'][0][i][word_index][0][0][0].strip().replace("@",""))
    word_times.append(subject_file['words'][0][i][time_index][0][0])
    word_pos.append(part_of_speaches_features[i])
    speach_states.append(speach_features[i])
    
    
blocks = subject_file['time'][:,1]
scan_times = subject_file['time'][:,0]

block_ends = {}
block_ends_indexes = {}


for block_id in np.arange(1,5):
    block_ends_indexes[block_id] = np.where(scan_times == np.max(scan_times[np.where(blocks == block_id )]))[0]
    block_ends[block_id] = scan_times[block_ends_indexes[block_id]]+2


print(block_ends)
print(block_ends_indexes)

block_texts = {1:[],2:[],3:[],4:[]}
block_pos = {1:[],2:[],3:[],4:[]}
block_speach_state = {1:[],2:[],3:[],4:[]}
block_steps = {1:[],2:[],3:[],4:[]}
character_feature = {1:[],2:[],3:[],4:[]}
block_id = 1
for index in np.arange(len(actual_words)):
    if word_times[index] > block_ends[block_id]:
        block_id += 1
    block_texts[block_id].append(str(actual_words[index].encode("ascii",'ignore').decode()))
    block_pos[block_id].append(word_pos[index])
    block_speach_state[block_id].append(speach_states[index][0])
    block_steps[block_id].append(index)
    character_feature[block_id].append(story_features['features'][0][characters_feature_id][2][index])

{1: array([680], dtype=uint16), 2: array([1384], dtype=uint16), 3: array([1942], dtype=uint16), 4: array([2702], dtype=uint16)}
{1: array([339]), 2: array([691]), 3: array([970]), 4: array([1350])}


In [17]:
import re
import string

end_of_sentence_indexes = {1:[],2:[],3:[],4:[]}
start_of_sentence_indexes = {1:[],2:[],3:[],4:[]}
qout_indexes = {1:[],2:[],3:[],4:[]}
inside_qout_indexes = {1:[],2:[],3:[],4:[]}

for block_id in [1,2,3,4]:
    start_of_sentence_indexes[block_id].append(0)
    inside_qout = False
    for i in np.arange(len(block_texts[block_id])):
        
        already_in = False   
        if "\"" in block_texts[block_id][i]:
            qout_indexes[block_id].append(i)
            
            checked = False
            if block_texts[block_id][i].strip().startswith("\""):
                if inside_qout is False :
                    if already_in == False:
                        inside_qout_indexes[block_id].append(i)
                        already_in = True
                inside_qout = not inside_qout
                checked = True
                
            if block_texts[block_id][i].strip().endswith("\""):
                if inside_qout is True :
                    if already_in == False:
                        inside_qout_indexes[block_id].append(i) 
                        already_in = True
                inside_qout = not inside_qout
                checked = True
                
            if checked == False:
                print(block_texts[block_id][i])
                
        if inside_qout:
            if already_in == False:
                inside_qout_indexes[block_id].append(i)
        #end/start of sentence
        truth_table = [punc in block_texts[block_id][i] for punc in ["!",".","?",":"]]
        if True in truth_table:
            end_of_sentence_indexes[block_id].append(i)
            if i+1 < len(block_texts[block_id]):
                start_of_sentence_indexes[block_id].append(i+1)
        

In [18]:
inside_qout_feature = {1:[],2:[],3:[],4:[]}
start_sentence_feature = {1:[],2:[],3:[],4:[]}
end_sentence_feature = {1:[],2:[],3:[],4:[]}
for block_id in [1,2,3,4]:
    inside_qout_feature[block_id] = np.zeros((len(block_texts[block_id])))
    inside_qout_feature[block_id][inside_qout_indexes[block_id]] = 1
    
    start_sentence_feature[block_id] = np.zeros((len(block_texts[block_id])))
    start_sentence_feature[block_id][start_of_sentence_indexes[block_id]] = 1

    end_sentence_feature[block_id] = np.zeros((len(block_texts[block_id])))
    end_sentence_feature[block_id][end_of_sentence_indexes[block_id]] = 1
    

In [19]:
block_feature = character_feature
brain_mapped_features = []
for block_id in [1,2,3,4]:
    for j in np.arange(0,len(block_feature[block_id]),4):
        i = 0
        related_features = []
        while i < 4 and (i+j) < len(block_feature[block_id]):
            if max(block_feature[block_id][i+j]) == 0:
                related_features.append(0)
            else:
                related_features.append(1+ np.argmax(block_feature[block_id][i+j]))
            i += 1
        
        brain_mapped_features.append(np.max(related_features))   
    

In [20]:
print(np.asarray(brain_mapped_features).shape)
print(np.asarray(reduced_brain_vectors).shape)
print(np.asarray(not_reduced_brain_vectors).shape)

(1295,)
(1295, 512)
(1295, 37913)


In [23]:
clf = SVC(kernel='linear', C=1,class_weight='balanced',probability=True)
clf.fit(reduced_brain_vectors[200+1:], brain_mapped_features[200:len(brain_mapped_features)-1])

prediction_test = clf.predict(reduced_brain_vectors[1:200+1])
probability_prediction_test = clf.predict_proba(reduced_brain_vectors[1:200+1])


train_accuracy = np.mean(brain_mapped_features[:200] == prediction_test)

print("accuracy: ",train_accuracy)
cnf_matrix = confusion_matrix(brain_mapped_features[:200], prediction_test)
print(cnf_matrix)

result = []
for i in np.arange(len(brain_mapped_features[:200])):
    if brain_mapped_features[:200][i] > 0:
        j = np.where (np.argsort(probability_prediction_test[i]) == np.asarray(brain_mapped_features[:200][i]))
        result.append(1/(11 - j[0][0]))
    
print("MRR: ",np.mean(result))

accuracy:  0.485
[[94  4 28  2  0  0  3  1  2  2]
 [13  0  3  0  0  0  0  0  0  0]
 [ 8  1  2  2  0  1  1  0  0  0]
 [ 2  1  1  1  0  0  0  0  0  1]
 [ 1  0  0  0  0  0  0  0  0  0]
 [ 1  0  0  0  0  0  0  0  0  0]
 [10  0  3  0  0  1  0  0  1  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 8  0  2  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]]
MRR:  0.2827262581168831


In [24]:
clf = SVC(kernel='linear', C=1,class_weight='balanced',probability=True)
clf.fit(reduced_brain_vectors[200+2:], brain_mapped_features[200:len(brain_mapped_features)-2])

prediction_test = clf.predict(reduced_brain_vectors[2:200+2])
probability_prediction_test = clf.predict_proba(reduced_brain_vectors[2:200+2])


train_accuracy = np.mean(brain_mapped_features[:200] == prediction_test)

print("accuracy: ",train_accuracy)
cnf_matrix = confusion_matrix(brain_mapped_features[:200], prediction_test)
print(cnf_matrix)

result = []
for i in np.arange(len(brain_mapped_features[:200])):
    if brain_mapped_features[:200][i] > 0:
        j = np.where (np.argsort(probability_prediction_test[i]) == np.asarray(brain_mapped_features[:200][i]))
        result.append(1/(11 - j[0][0]))
    
print("MRR: ",np.mean(result))

accuracy:  0.51
[[98  7  1 19  6  0  1  2  2  0]
 [10  1  0  1  3  0  0  1  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 9  0  0  2  2  0  1  0  1  0]
 [ 4  0  0  2  0  0  0  0  0  0]
 [ 0  0  0  1  0  0  0  0  0  0]
 [ 1  0  0  0  0  0  0  0  0  0]
 [11  0  0  2  0  0  1  1  0  0]
 [ 6  1  0  2  0  0  0  0  0  1]
 [ 0  0  0  0  0  0  0  0  0  0]]
MRR:  0.2753782242063492


In [25]:
clf = SVC(kernel='linear', C=1,class_weight='balanced',probability=True)
clf.fit(reduced_brain_vectors[200+3:], brain_mapped_features[200:len(brain_mapped_features)-3])

prediction_test = clf.predict(reduced_brain_vectors[3:200+3])
probability_prediction_test = clf.predict_proba(reduced_brain_vectors[3:200+3])


train_accuracy = np.mean(brain_mapped_features[:200] == prediction_test)

print("accuracy: ",train_accuracy)
cnf_matrix = confusion_matrix(brain_mapped_features[:200], prediction_test)
print(cnf_matrix)

result = []
for i in np.arange(len(brain_mapped_features[:200])):
    if brain_mapped_features[:200][i] > 0:
        j = np.where (np.argsort(probability_prediction_test[i]) == np.asarray(brain_mapped_features[:200][i]))
        result.append(1/(11 - j[0][0]))
    
print("MRR: ",np.mean(result))

accuracy:  0.515
[[97  2 25  1  0  2  3  2  2  2]
 [10  1  4  1  0  0  0  0  0  0]
 [ 9  0  5  0  0  0  0  0  1  0]
 [ 0  0  4  0  0  0  1  1  0  0]
 [ 1  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  1  0]
 [11  0  2  2  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 7  1  0  0  0  0  1  0  0  1]
 [ 0  0  0  0  0  0  0  0  0  0]]
MRR:  0.2878229843073593


In [26]:
clf = SVC(kernel='linear', C=1,class_weight='balanced',probability=True)
clf.fit(reduced_brain_vectors[200+4:], brain_mapped_features[200:len(brain_mapped_features)-4])

prediction_test = clf.predict(reduced_brain_vectors[4:200+4])
probability_prediction_test = clf.predict_proba(reduced_brain_vectors[4:200+4])


train_accuracy = np.mean(brain_mapped_features[:200] == prediction_test)

print("accuracy: ",train_accuracy)
cnf_matrix = confusion_matrix(brain_mapped_features[:200], prediction_test)
print(cnf_matrix)

result = []
for i in np.arange(len(brain_mapped_features[:200])):
    if brain_mapped_features[:200][i] > 0:
        j = np.where (np.argsort(probability_prediction_test[i]) == np.asarray(brain_mapped_features[:200][i]))
        result.append(1/(11 - j[0][0]))
    
print("MRR: ",np.mean(result))

accuracy:  0.46
[[87  2 24  7  1  1  8  3  3]
 [12  1  2  0  0  0  1  0  0]
 [10  0  4  0  0  1  0  0  0]
 [ 2  1  3  0  0  0  0  0  0]
 [ 0  0  0  1  0  0  0  0  0]
 [ 1  0  0  0  0  0  0  0  0]
 [11  0  3  1  0  0  0  0  0]
 [ 8  0  2  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0]]
MRR:  0.2843073593073593


In [99]:
brain_mapped_features[:200]

[3,
 3,
 3,
 0,
 3,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 3,
 0,
 3,
 0,
 3,
 0,
 0,
 0,
 3,
 3,
 9,
 9,
 1,
 1,
 0,
 9,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 9,
 0,
 9,
 0,
 0,
 0,
 0,
 9,
 0,
 0,
 0,
 9,
 0,
 0,
 0,
 0,
 3,
 9,
 0,
 0,
 0,
 7,
 0,
 7,
 0,
 0,
 3,
 7,
 0,
 0,
 0,
 0,
 4,
 0,
 7,
 0,
 0,
 0,
 4,
 0,
 4,
 0,
 4,
 0,
 0,
 7,
 4,
 0,
 0,
 7,
 0,
 0,
 4,
 0,
 0,
 3,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 7,
 7,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 7,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 0,
 0,
 0,
 7,
 7,
 1,
 0,
 0,
 7,
 9,
 0,
 0,
 0,
 1,
 6,
 0,
 0,
 0,
 0,
 1,
 7,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 3,
 9,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 5,
 0,
 0,
 0,
 0]

In [123]:
clf = SVC(kernel='linear', C=1,class_weight='balanced',probability=True)
clf.fit(not_reduced_brain_vectors[200:], brain_mapped_features[200:])

prediction_test = clf.predict(not_reduced_brain_vectors[:200])
probability_prediction_test = clf.predict_proba(not_reduced_brain_vectors[:200])


train_accuracy = np.mean(brain_mapped_features[:200] == prediction_test)

print("accuracy: ",train_accuracy)
cnf_matrix = confusion_matrix(brain_mapped_features[:200], prediction_test)
print(cnf_matrix)

result = []
for i in np.arange(len(brain_mapped_features[:200])):
    if brain_mapped_features[:200][i] > 0:
        j = np.where (np.argsort(probability_prediction_test[i]) == np.asarray(brain_mapped_features[:200][i]))
        result.append(1/(11 - j[0][0]))
    
print("MRR: ",np.mean(result))

accuracy:  0.68
[[133   0   3   0   0   0   0   0]
 [ 16   0   0   0   0   0   0   0]
 [ 12   0   3   0   0   0   0   0]
 [  6   0   0   0   0   0   0   0]
 [  1   0   0   0   0   0   0   0]
 [  1   0   0   0   0   0   0   0]
 [ 15   0   0   0   0   0   0   0]
 [ 10   0   0   0   0   0   0   0]]
MRR:  0.2567775974025974
