In [1]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import sys
from typing import List
import numpy as np
import joblib
import imodelsx.util
import sasc.viz
from copy import deepcopy
from numpy.linalg import norm
from sasc.config import CACHE_DIR, RESULTS_DIR


num_top_test_ngrams = 75
from sasc.modules.fmri_module import convert_module_num_to_voxel_num, add_stability_score
tqdm.pandas()

r_opt = pd.read_pickle(join(RESULTS_DIR, 'results_fmri_full_1500_opt.pkl'))
r_llama = pd.read_pickle(join(RESULTS_DIR, 'results_fmri_full_1500_llama.pkl'))

### Add stability scores

In [None]:
# ngram_scores_dict_opt = {}
# ngram_scores_dict_llama = {}
# stability_scores = {}
# for subject in ["UTS01", "UTS02", "UTS03"]:
#     ngram_scores_dict_opt[subject] = joblib.load(
#         join(CACHE_DIR, "cache_ngrams", f"fmri_{subject}.pkl")
#     )
#     ngram_scores_dict_llama[subject] = joblib.load(
#         join(CACHE_DIR, "cache_ngrams", f"fmri_{subject}_llama.pkl")
#     )

#     mat1 = ngram_scores_dict_opt[subject].T
#     mat2 = ngram_scores_dict_llama[subject].T

#     # calculate correlations between rows of the mats
#     corrs = np.zeros(500)
#     for i in tqdm(range(mat1.shape[0])):
#         corrs[i] = np.corrcoef(mat1[i], mat2[i])[0, 1]
#     stability_scores[subject] = deepcopy(corrs)
# joblib.dump(stability_scores, join(RESULTS_DIR, "fmri_stability_scores.jbl"))

In [None]:
plt.figure(dpi=300)
STABILITY_SCORES_DICT = joblib.load(join(RESULTS_DIR, "fmri_stability_scores.jbl"))
for i, subject in enumerate(["UTS01", "UTS02", "UTS03"]):
    plt.hist(STABILITY_SCORES_DICT[subject], label=subject, alpha=0.7)
    plt.axvline(np.mean(STABILITY_SCORES_DICT[subject]), linewidth=3, color=f"C{i}")
plt.legend()
plt.xlabel("Stability score (correlation of ngram scores)")
plt.ylabel("Number of voxels")
plt.show()

### Export gsheet

In [2]:
def clean_df(df):
    # df = df.sort_values(by=['top_score_synthetic'], ascending=False)
    # df['pid'] = df['subject'] + ' ' + df['module_num'].astype(str)
    # df = df.set_index('pid')
    df["voxel_num"] = df.progress_apply(
        lambda row: convert_module_num_to_voxel_num(row["module_num"], row["subject"]),
        axis=1,
    )
    df["stability_score"] = df.progress_apply(
        lambda row: add_stability_score(row["module_num"], row["subject"]),
        axis=1,
    )
    return df


r = clean_df(r_opt)
for k in [
    "top_score_synthetic",
    "fmri_test_corr",
    "top_ngrams_module_correct",
    "top_score_normalized",
    'top_explanation_init_strs',
    'explanation_init_strs',
]:
    r[k + "_llama"] = r_llama[k]

# overlapping ngrams
# r.apply(
#     lambda row: len(
#         set(row["top_ngrams_module_correct"].tolist()).intersection(
#             row["top_ngrams_module_correct_llama"].tolist()
#         )
#     ),
#     axis=1,
# )

100%|██████████| 1500/1500 [00:13<00:00, 110.78it/s]
100%|██████████| 1500/1500 [00:00<00:00, 154977.24it/s]


In [3]:
columns = {
    # fmri stuff
    'subject': 'Subject',
    'module_num': 'Voxel',
    'roi_func': 'ROI (functional)',
    'roi_anat': 'ROI (anatomical)',
    'fmri_test_corr': 'Enc. correlation',
    'fmri_test_corr_llama': 'Enc. correlation (llama)',
    
    # scores
    'top_score_normalized': 'Explanation score',
    'top_score_normalized_llama': 'Explanation score (llama)',
    'stability_score': 'Stability score',
    'frac_top_ngrams_module_correct': 'Fraction of matching ngrams (module, top-75)',
    'rankcorr_expl_test': 'Correlation (test) when predicting with only explanation',
    # 'frac_top_ngrams_test_correct': 'Fraction of matching ngrams (test, top-75)',
            
    # explanation
    'top_explanation_init_strs': 'Explanation',
    'top_explanation_init_strs_llama': 'Explanation (llama)',
        
    # ngrams matching the explanation (used 75 ngrams)
    'top_ngrams_module_correct': 'Matching top ngrams (out of top-75)',
    'top_ngrams_module_correct_llama': 'Matching top ngrams (llama, out of top-75)',
    # 'top_ngrams_test_correct': 'Matching ngrams (test, top-75)',

    # all ngrams
    'top_ngrams_module_25': 'All top ngrams (top-25)',
    # 'top_ngrams_test_25': 'Top ngrams (test, top-25)',

    # alternative explanations
    'explanation_init_strs': 'Explanation candidates',
    'explanation_init_strs_llama': 'Explanation candidates (llama)',
}

tab = (
    r
    .sort_values(by=['top_score_synthetic'], ascending=False)
    # .sort_values(by=['rankcorr_expl_test'], ascending=False)
    .filter(columns.keys())
    .rename(columns=columns)
    .round(3)
)
with pd.option_context('display.max_colwidth', None, 'display.max_rows', 200):
    # display(
        # tab.head(3)
    # )
    tab_join_lists = tab.applymap(lambda x: ' __ '.join(x) if isinstance(x, np.ndarray) or isinstance(x, list) else x)
    tab_join_lists.to_csv('../results/results_fmri.csv', index=False, float_format='%.3f')
r[columns.keys()].to_pickle('../results/fmri_results_merged.pkl')

### Look at relationships between things

In [2]:
cols = [
    "Corr (test)",
    "Expl score",
    "Frac matching ngrams",
    "Expl corr (test)",
]
# sns.pairplot(
#     r_opt[cols],
#     kind="reg",
#     diag_kind="kde",
#     plot_kws={"scatter_kws": {"alpha": 0.1}},
#     markers=".",
#     height=2,
#     aspect=1.5,
#     corner=True,
# )
# plt.show()

In [3]:
left = r_opt
right = r_llama
suffixes = ("", "_llama")

# merge the two dataframes
df = left.merge(right, left_index=True, right_index=True, suffixes=suffixes)

In [4]:
# sns.heatmap(df.corr())
corrs = df.corr()
# hide the upper triangle
mask = np.zeros_like(corrs, dtype=bool)
mask[np.triu_indices_from(mask)] = True
corrs[mask] = 0
plt.figure(figsize=(8, 8), dpi=300)
sasc.viz.imshow_diverging(corrs)
plt.yticks(ticks=range(len(corrs.columns)), labels=corrs.columns, rotation=0)
plt.xticks(ticks=range(len(corrs.columns)), labels=corrs.columns, rotation=90)
plt.show()

ValueError: could not convert string to float: 'text-davinci-003'

### Overarching stats

In [None]:
rt = r[['top_score_normalized', 'rankcorr_expl_test']]
# rt = rt.sort_values('fmri_test_corr', ascending=False)
print(rt.mean())
print(rt.std() / np.sqrt(rt.shape[0]))

In [None]:
dsets = joblib.load(join(SAVE_DIR_FMRI, 'stories', 'running_words.jbl'))

In [None]:
spearmans = []
pearsons = []
cvs = []
# test different prediction mappings
for i in tqdm(range(r.shape[0])):
    row = r.iloc[i]
    resp = dsets[row["subject"]]["resp"][:, row["module_num"]]
    neg_dists = np.array(row["neg_dists_expl_test"])
    neg_dists[np.isnan(neg_dists)] = np.nanmean(neg_dists)

    # stack neg_dists with delays
    neg_dists_arr = np.stack(
        [
            np.concatenate((neg_dists[i:], np.ones(i) * np.nanmean(neg_dists)))
            for i in range(3)
        ]
    ).T
    # print(neg_dists_arr.shape, resp.shape)
    neg_dists = np.mean(neg_dists_arr, axis=1)
    
    spearmans.append(
        scipy.stats.spearmanr(
            neg_dists, resp, nan_policy="omit", alternative="greater"
        ).statistic
    )
    # pearson correlation
    pearsons.append(scipy.stats.pearsonr(neg_dists, resp)[0])

    # m = RidgeCV(alphas=10, scoring='r2')
    # m.fit(neg_dists_arr, resp)
    # neg_dists = m.predict(neg_dists_arr)
    # cvs.append(m.best_score_)
    cvs.append(0)
    # cvs.append(scipy.stats.pearsonr(neg_dists, resp)[0])
    # cvs = 
print("means", np.mean(spearmans), np.mean(pearsons), np.mean(cvs))
r['rankcorr_expl_test'] = spearmans