In [None]:
import pickle
import warnings
import numpy as np
from scipy import spatial
from scipy.io import loadmat
from scipy.stats import pearsonr
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")


In [None]:
# pickle load the file ensemble.pkl
with open("ensemble_wt_avg.pkl", "rb") as f:
    ensemble = pickle.load(f)


In [None]:
def pairwise_accuracy(actual, predicted):
    true = 0
    total = 0
    for i in range(0, len(actual)):
        for j in range(i + 1, len(actual)):
            total += 1

            s1 = actual[i]
            s2 = actual[j]
            b1 = predicted[i]
            b2 = predicted[j]

            result1 = spatial.distance.cosine(s1, b1)
            result2 = spatial.distance.cosine(s2, b2)
            result3 = spatial.distance.cosine(s1, b2)
            result4 = spatial.distance.cosine(s2, b1)

            if result1 + result2 < result3 + result4:
                true += 1

    return true / total


In [None]:
def pearcorr(actual, predicted):
    corr = []
    for i in range(0, len(actual)):
        corr.append(np.corrcoef(actual[i], predicted[i])[0][1])
    return np.mean(corr)


In [None]:
def generate_indices(data):
    Taskindices = []
    for j in data["meta"][0][0][11][0][5]:
        for k in j[0]:
            Taskindices.append(int(k))
    DMNindices = []
    for j in data["meta"][0][0][11][0][6]:
        for k in j[0]:
            DMNindices.append(int(k))
    Visualindices = []
    Visualindices_body = []
    Visualindices_face = []
    Visualindices_object = []
    Visualindices_scene = []
    for j in data["meta"][0][0][11][0][9]:
        for k in j[0]:
            Visualindices_body.append(int(k))
    for j in data["meta"][0][0][11][0][10]:
        for k in j[0]:
            Visualindices_face.append(int(k))
    for j in data["meta"][0][0][11][0][11]:
        for k in j[0]:
            Visualindices_object.append(int(k))
    for j in data["meta"][0][0][11][0][12]:
        for k in j[0]:
            Visualindices_scene.append(int(k))

    for j in data["meta"][0][0][11][0][13]:
        for k in j[0]:
            Visualindices.append(int(k))
    Languageindices_lh = []
    Languageindices_rh = []
    for j in data["meta"][0][0][11][0][7]:
        for k in j[0]:
            Languageindices_lh.append(int(k))
    for j in data["meta"][0][0][11][0][8]:
        for k in j[0]:
            Languageindices_rh.append(int(k))
    return (
        Taskindices,
        DMNindices,
        Visualindices_body,
        Visualindices_face,
        Visualindices_object,
        Visualindices_scene,
        Visualindices,
        Languageindices_lh,
        Languageindices_rh,
    )


In [None]:
ROIS = [
    "language_lh",
    "language_rh",
    "vision_body",
    "vision_face",
    "vision_object",
    "vision_scene",
    "vision",
    "dmn",
    "task",
]
subjects = ["P01", "M02", "M04", "M07", "M15"]
layers_bert = [
    "block1",
    "block2",
    "block3",
    "block4",
    "block5",
    "block6",
    "block7",
    "block8",
    "block9",
    "block10",
    "block11",
    "block12",
    "fc",
]
layers_bart = ["fc"]


In [None]:
def get_subject_data(subject):
    data_pic1 = loadmat("./pereira_dataset/" + subject + "/data_384sentences.mat")
    data_pic2 = loadmat("./pereira_dataset/" + subject + "/data_243sentences.mat")
    data_pic = loadmat("./pereira_dataset/" + subject + "/data_384sentences.mat")

    data_pic["examples_passagesentences"] = np.concatenate(
        (
            data_pic1["examples_passagesentences"],
            data_pic2["examples_passagesentences"],
        ),
        axis=0,
    )
    data_pic["meta"] = np.concatenate((data_pic1["meta"], data_pic2["meta"]), axis=0)
    (
        Taskindices,
        DMNindices,
        Visualindices_body,
        Visualindices_face,
        Visualindices_object,
        Visualindices_scene,
        Visualindices,
        Languageindices_lh,
        Languageindices_rh,
    ) = generate_indices(data_pic)

    roi_indices = {
        "language_lh": Languageindices_lh,
        "language_rh": Languageindices_rh,
        "vision_body": Visualindices_body,
        "vision_face": Visualindices_face,
        "vision_object": Visualindices_object,
        "vision_scene": Visualindices_scene,
        "vision": Visualindices,
        "dmn": DMNindices,
        "task": Taskindices,
    }

    fmri = {}
    for roi, indices in roi_indices.items():
        fmri[roi] = data_pic["examples_passagesentences"][0:, np.array(indices) - 1]

    return fmri


In [None]:
def evaluate(Y, roi, subject):
    dataset_Y = np.array(Y.copy())
    _, y_test, _, y_pred = train_test_split(
        dataset_Y, ensemble[subject][roi], test_size=0.2, random_state=42
    )
    acc = pairwise_accuracy(y_test, y_pred)
    corr = pearcorr(y_test, y_pred)

    return acc, corr


In [None]:
output = {}
output["2v2"] = {}
output["pear"] = {}

for roi in ROIS:
    print(roi)
    output["2v2"][roi] = {}
    output["pear"][roi] = {}

    for subject in subjects:
        print(subject)
        fmri = get_subject_data(subject)
        voxels = np.array(fmri[roi])
        acc, corr = evaluate(voxels, roi, subject)
        output["2v2"][roi][subject] = acc
        output["pear"][roi][subject] = corr


In [None]:
with open(f"results/results_ensemble_wt_avg.pkl", "wb") as f:
    pickle.dump(output, f)
