In [38]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import sys
from typing import List
import numpy as np
import joblib
from pprint import pprint
import imodelsx.util
import sasc.viz
import pickle as pkl
import json
from copy import deepcopy
from numpy.linalg import norm
from sasc.config import CACHE_DIR, RESULTS_DIR, cache_ngrams_dir, regions_idxs_dir
import sasc.modules.fmri_module
ngrams_list = joblib.load(join(cache_ngrams_dir, 'fmri_UTS02_ngrams.pkl'))
rois_dict = joblib.load(join(regions_idxs_dir, 'rois_S02.jbl'))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### get predictions from embs

In [None]:
# # embs = joblib.load(join(cache_ngrams_dir, 'fmri_embs.pkl'))
# embs = joblib.load(join(cache_ngrams_dir, 'fmri_embs_llama.pkl'))
# mod = sasc.modules.fmri_module.fMRIModule(
#     subject="UTS02",
#     # checkpoint="facebook/opt-30b",
#     checkpoint="huggyllama/llama-30b",
#     init_model=False,
#     restrict_weights=False,
# )
# voxel_preds = mod(embs=embs, return_all=True)
# outputs_dict = {
#     k: voxel_preds[:, np.array(rois_dict[k])].mean(axis=1)
#     for k in rois_dict
# }
# joblib.dump(outputs_dict, join(
#     # cache_ngrams_dir, 'rois_ngram_outputs_dict.pkl'))
#     cache_ngrams_dir, 'rois_ngram_outputs_dict_llama.pkl'))

In [None]:
outputs_dict = joblib.load(
    join(cache_ngrams_dir, 'rois_ngram_outputs_dict.pkl'))
df_opt = pd.DataFrame(outputs_dict, index=ngrams_list)
outputs_dict = joblib.load(
    join(cache_ngrams_dir, 'rois_ngram_outputs_dict_llama.pkl'))
df_llama = pd.DataFrame(outputs_dict, index=ngrams_list)
df = df_opt + df_llama

In [None]:
ROIS_LOC = ['RSC', 'OPA', 'PPA']
ascending = False  # should be false to get driving ngrams
top_ngrams_dict = {}
for k in df.columns:
    top_ngrams_dict[k] = df.sort_values(
        k, ascending=ascending).index[:100].tolist()
    if k in ROIS_LOC:
        df[k + '_only'] = df[k] - \
            df[[c for c in ROIS_LOC if c != k]].mean(axis=1)
        top_ngrams_dict[k + '_only'] = df.sort_values(
            k + '_only', ascending=ascending).index[:100].tolist()
top_ngrams_df = pd.DataFrame(top_ngrams_dict)
top_ngrams_df.to_csv('top_ngrams_by_roi.csv')
with pd.option_context('display.max_rows', None):
    rois = ['RSC', 'OPA', 'PPA', 'IPS', 'pSTS', 'sPMv',
            'EBA', 'OFA'] + ['RSC_only', 'OPA_only', 'PPA_only']
    display(top_ngrams_df[rois])

In [None]:
gpt4 = imodelsx.llm.get_llm('gpt-4-turbo-0125-spot')

explanations = {}
for k in top_ngrams_df.columns:

    s = '- ' + '\n- '.join(top_ngrams_df[k].iloc[:60])
    prompt = f'''Here is a list of phrases:
    {s}

    What is a common theme among these phrases? Return only a concise phrase.'''

    explanations[k] = gpt4(prompt)

In [None]:
json.dump(explanations, open('explanations_by_roi.json', 'w'), indent=4)

### Export selected rois to pkl

In [40]:
rois = ['RSC', 'OPA', 'PPA', 'IPS', 'pSTS', 'sPMv',
        'EBA', 'OFA'] + ['RSC_only', 'OPA_only', 'PPA_only']
pprint({k: explanations[k] for k in rois})
explanations_clean = {
    'EBA': 'Body parts',
    'IPS': 'Descriptive elements of scenes or objects',
    'OFA': 'Personal growth and reflection',
    'OPA': 'Direction and location descriptions',
    'OPA_only': 'Spatial positioning and direction.',
    'PPA': 'Scenes and settings',
    'PPA_only': 'Negative or critical expressions',
    'RSC': 'Travel and location names',
    'RSC_only': 'Travel and location names',
    'pSTS': 'Verbal interactions',
    'sPMv': 'Time and numbers'}
for roi in rois:
    print(f'"{roi}":', str(
        top_ngrams_df[roi].iloc[:30].values.tolist()) + ', ')
    # {
    # roi:  for roi in rois
# })
top_ngrams_clean = {
    "RSC": ['drove from vermont', 'moved to vermont', 'drove to washington', 'here in manhattan', 'here in boston', 'was in boston', 'off into vancouver', 'moved to chicago', 'back in manhattan', 'went to boston', 'was in mexico', 'back in boston', 'sitting in indianapolis', 'arrived in indianapolis', 'came to florida', 'i left vermont', 'here in houston', 'was in pennsylvania', 'moved to brooklyn', 'arrived in tokyo'],
    "OPA": ['onto the railing', 'against the railing', 'on the railing', 'towards the river', 'onto the sidewalk', 'towards the doors', 'towards the door', 'outside the windows', 'towards the ceiling', 'long hallway toward', 'to the horizon', 'see the horizon', 'and high rafters', 'towards the street', 'over the gulf', 'to my left', 'path that jutted', 'beautiful moonlit mountains', 'on the ceiling', 'on the windowsill'],
    "PPA": ['on the railing', 'on a dock', 'on the windowsill', 'mile of cornfields', 'the windowsill', 'onto the railing', 'outside the windows', 'across the parking', 'contain strip malls', 'against the railing', 'on a rainy', 'the rolling hills', 'beautiful moonlit mountains', 'of the sidewalk', 'giant stone cliffs', 'a strip mall', "'s sprawling green", 'lobster pots piled', 'on the sidewalk', 'nondescript office buildings'],
    "IPS": ['there were slats', 'onto the railing', 'on the railing', 'against the railing', 'the back hatch', 'four connected squares', 'in long rows', 'path that jutted', 'the double doors', 'on the sides', 'a long narrow', 'that forms horizontal', 'long rows of', 'sixty foot wide', 'spanding the length', 'put a board', 'sky with clouds', 'between buttered slices', 'were slats', 'nineteen sixty'],
    "pSTS": ['said excuse me', 'says excuse me', 'i stopped midstride', 'room went silent', 'scissors someone shouted', 'i provoked gasps', 'somebody then yelled', 'she started laughing', 'hook excuse me', 'i whirled around', 'asked i laughed', 'exhalation someone shouted', 'retorted rather loudly', 'stopped midstride', 'turned and said', 'hurry she exclaimed', 'behind me grabbed', 'i started yelling', 'script excuse me', 'looks over at'],
    "sPMv": ['one mississippi two', 'said excuse me', 'mississippi two mississippi', 'said guess what', 'am turning forty', 'april nineteen forty', 'says excuse me', 'say one mississippi', 'two mississippi three', 'october nineteen forty', 'was sixteen seventeen', 'and three mississippi', 'five only twenty', 'april of nineteen', 'mississippi three mississippi', 'hook excuse me', 'july nineteen forty', 'up three down', 'march twentieth nineteen', 'more time passed'],
    "EBA": ['wraps his arms', 'lifted her dress', 'arms flailing', 'arms around her', 'arms tighten around', 'flying arms flailing', 'hands gripped the', 'grabbed her legs', 'the chopsticks flipped', 'his hands folded', 'grab his arms', 'a hand poking', 'my feet kicking', 'his hand curling', 'grabbed their hands', 'grabbed her hand', 'blanket and yanked', 'her hands gripped', 'his arm around', 'covered my hand'],
    "OFA": ['of my childhood', 'newfound self esteem', 'so my shrink', 'hurtful first dates', 'recall many instances', 'it felt magical', 'answered many questions', 'my school days', 'no satisfying fantasies', 'my mom often', 'from our childhood', 'growing up we', 'good friends often', 'shaped their mind', 'everything my parents', 'hurt and confused', 'for something spiritual', 'my whirlwind dating', 'of her thoughts', 'changes your life'],
    "RSC_only": ['came to florida', 'back in israel', 'moved to london', 'traveled to marrakesh', 'sitting in indianapolis', 'went to boston', 'was in boston', 'moved to vermont', 'was in mexico', 'moved to chicago', 'were in paris', 'was in pennsylvania', 'arrived in tokyo', 'drove from vermont', 'here in boston', 'living in chicago', 'off into vancouver', 'here in houston', 'back in boston', 'back in marrakesh'],
    "OPA_only": ['towards the ceiling', 'onto the railing', 'on the ceiling', 'against the railing', 'feet hanging over', 'on the railing', 'towards the doors', 'seats behind', 'towards the door', 'lights peeking over', 'to my left', 'situated herself behind', 'you sit backward', 'to the horizon', 'maybe twelve feet', 'at the ceiling', 'towards the street', 'of seats behind', 'twenty feet above', 'his back turned'],
    "PPA_only": ['kind of corny', 'of bready puns', 'like burnt steak', 'pulled a muscle', 'his painting sucked', 'like your shirt', "'s painting sucked", 'important like pudding', 'a snake oil', 'had some scarring', 'ithe school motto', 'bready puns', 'liar fake', 'sometimes disrespected me', 'fake name', 'okay snake oil', 'for bad puns', 'of torn fishnet', 'richard had autism', 'tasted pretty bad'],
}

{'EBA': 'Physical actions',
 'IPS': 'Descriptive elements of scenes or objects',
 'OFA': 'Personal growth and reflection',
 'OPA': 'Direction and location descriptions',
 'OPA_only': 'Spatial positioning and direction.',
 'PPA': 'Scenes and settings',
 'PPA_only': 'Negative or critical expressions',
 'RSC': 'Travel and locations',
 'RSC_only': 'Travel and location transitions',
 'pSTS': 'Verbal interactions',
 'sPMv': 'Time and numbers'}
"RSC": ['drove from vermont', 'moved to vermont', 'drove to washington', 'here in manhattan', 'here in boston', 'was in boston', 'off into vancouver', 'moved to chicago', 'back in manhattan', 'went to boston', 'was in mexico', 'back in boston', 'sitting in indianapolis', 'arrived in indianapolis', 'came to florida', 'i left vermont', 'here in houston', 'was in pennsylvania', 'moved to brooklyn', 'arrived in tokyo', 'moved to london', 'off in vancouver', 'traveled to marrakesh', 'moved to washington', "'m in michigan", 'back in brooklyn', 'i drove to', 

In [None]:
top_ngrams_df[rois]

In [None]:
rows = {
    'roi': rois,
    'expl': [explanations_clean[k] for k in rois],
    'top_ngrams_module_correct': [top_ngrams_clean[k] for k in rois],
    # 'question': questions,
    'subject': ['UTS02'] * len(rois),
    'voxel_nums': [rois_dict[k.replace('_only', '')] for k in rois],
}

In [None]:
pd.DataFrame(rows).to_pickle('rows_roi_uts02_may31.pkl')