In [None]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import sys
from typing import List
import numpy as np
import joblib
import imodelsx.util
import sasc.viz
import pickle as pkl
from copy import deepcopy
from numpy.linalg import norm
from sasc.config import CACHE_DIR, RESULTS_DIR, cache_ngrams_dir, regions_idxs_dir
import sasc.modules.fmri_module

In [None]:
ngrams_list = joblib.load(join(cache_ngrams_dir, 'fmri_UTS02_ngrams.pkl'))
rois_dict = joblib.load(join(regions_idxs_dir, 'rois_S02.jbl'))
outputs_dict = joblib.load(
    join(cache_ngrams_dir, 'rois_ngram_outputs_dict.pkl'))

In [None]:
# embs = joblib.load(join(cache_ngrams_dir, 'fmri_embs.pkl'))
# mod = sasc.modules.fmri_module.fMRIModule(
#     subject="UTS02",
#     checkpoint="facebook/opt-30b",
#     init_model=False,
#     restrict_weights=False,
# )
# voxel_preds = mod(embs=embs, return_all=True)
# outputs_dict = {
#     k: voxel_preds[:, np.array(rois_dict[k])].mean(axis=1)
#     for k in rois_dict
# }
# joblib.dump(outputs_dict, join(cache_ngrams_dir, 'rois_ngram_outputs_dict.pkl'))

In [None]:
df = pd.DataFrame(outputs_dict, index=ngrams_list)
ROIS_LOC = ['RSC', 'OPA', 'PPA']
ascending = False  # should be false to get driving ngrams
top_ngrams_dict = {}
for k in df.columns:
    top_ngrams_dict[k] = df.sort_values(
        k, ascending=ascending).index[:100].tolist()
    if k in ROIS_LOC:
        df[k + '_only'] = df[k] - \
            df[[c for c in ROIS_LOC if c != k]].mean(axis=1)
        top_ngrams_dict[k + '_only'] = df.sort_values(
            k + '_only', ascending=ascending).index[:100].tolist()
top_ngrams_df = pd.DataFrame(top_ngrams_dict)
# display all rows
with pd.option_context('display.max_rows', None):
    display(top_ngrams_df)

In [None]:
top_ngrams_df.to_csv('top_ngrams_by_roi.csv')

In [None]:
gpt4 = imodelsx.llm.get_llm('gpt-4-turbo-0125-spot')

explanations = {}
for k in top_ngrams_df.columns:

    s = '- ' + '\n- '.join(top_ngrams_df[k].iloc[:60])
    prompt = f'''Here is a list of phrases:
    {s}

    What is a common theme among these phrases? Return only a concise phrase.'''

    explanations[k] = gpt4(prompt)

In [None]:
rois_dict['RSC']

### Export selected rois to pkl

In [None]:
rois = ['RSC', 'OPA', 'PPA', 'IPS', 'pSTS', 'sPMv',
        'EBA', 'OFA'] + ['RSC_only', 'OPA_only', 'PPA_only']
# print({k: explanations[k] for k in rois})
explanations_clean = {
    'RSC': 'Geographic locations',
    'OPA': 'Spatial descriptions',
    'PPA': 'Descriptive settings and landscapes',
    'IPS': 'Descriptive fragments',
    'pSTS': 'Verbal and nonverbal reactions in social interactions',
    'sPMv': 'Counting and time references',
    'EBA': 'Physical movements and interactions',
    'OFA': 'Personal growth and introspection',
    'RSC_only': 'Travel and locations',
    'OPA_only': 'Spatial orientation and positioning',
    'PPA_only': 'Negative or critical expressions'
}
for roi in rois:
    print(f'"{roi}":', str(
        top_ngrams_df[roi].iloc[:20].values.tolist()) + ', ')
    # {
    # roi:  for roi in rois
# })
top_ngrams_clean = {
    "RSC": ['moved to vermont', 'in downriver michigan', 'college in boston', 'northern midwest', 'in warmer mexico', 'in nashville tennessee', 'upstate new york', 'was in mexico', 'in minneapolis minnesota', 'traveled to marrakesh', 'in rural oregon', 'came to florida', 'suburbs of baltimore', 'downriver michigan', 'drove to washington', 'in louisville kentucky', 'here in manhattan'],
    "OPA": ['onto the railing', 'towards the ceiling', 'over the gulf', 'towards the river', 'against the railing', 'onto the sidewalk', 'on the railing', 'that distant shore', 'towards the doors', 'see the horizon', 'towards the door', 'facing the beach', 'seats behind', 'the door behind', 'reached the interstate', 'sidewalk in front', 'row of stalls', 'walled in backyard', 'on the ceiling', 'twenty feet above'],
    "PPA": ['mile of cornfields', 'beautiful moonlit mountains', 'giant stone cliffs', 'and shimmering skyscrapers', 'shining skyscrapers', 'on a dock', 'room with concrete', 'lakes and manicured', 'contain strip malls', 'shimmering skyscrapers', 'across the parking', 'little brick patio', 'park bench in', 'the striped curtains', "there 's trees", 'the dark driveway', 'of the sidewalk', 'and high rafters', 'of cornfields', 'walled in backyard'],
    "IPS": ['a long narrow', 'there were slats', 'that forms horizontal', 'four connected squares', 'the two poles', 'on the sides', 'all four corners', 'onto the railing', 'path that jutted', 'you sit backward', 'towards the ceiling', 'long rows of', 'the back hatch', 'divided bering strait', 'paper towel on', 'the tops of', 'the double doors', 'on the railing', 'pigtails with ribbons', 'this ice curtain'],
    "pSTS": ['room went silent', 'i stopped midstride', 'i provoked gasps', 'stopped midstride', 'said excuse me', 'says excuse me', 'asked i laughed', 'hear boys laughing', 'hook excuse me', 'crowd fell silent', 'moment she gasped', 'i scream stop', 'provoked gasps', 'she started laughing', 'scissors someone shouted', 'somebody then yelled', 'retorted rather loudly', 'smiled i smiled', 'script excuse me', 'stalls i brace'],
    "sPMv": ['mississippi two mississippi', 'up three down', 'said guess what', 'mississippi three mississippi', 'one mississippi two', 'five only twenty', 'said excuse me', 'was sixteen seventeen', 'april nineteen forty', 'googling her googling', 'say one mississippi', 'fifteen meters fifty', 'twenty minutes thirty', 'sixty nine nineteen', 'fifty sixty seventy', 'was july thirtieth', 'forty years pass', 'october nineteen forty', 'fifty pounds fifty', 'eleven march twenty'],
    "EBA": ['arms around her', 'lifted her dress', 'his hands folded', 'hands gripped the', 'elbows on knees', 'elbows on', 'grabbed her legs', 'over my shoulder', 'wraps his arms', 'grabbed their hands', 'elbows at', 'danced throwing blankets', 'elbows i swing', 'arms flailing', 'arms tighten around', 'her head brushed', 'holds it up', 'extended my hands', 'the sheets shuffled', 'covered my hand'],
    "OFA": ['ways of thinking', 'changes your life', 'good friends often', 'the family psychic', 'people whose ideas', 'of our conversations', 'well whatever um', 'games my mother', 'of her thoughts', 'change his life', 'our worst fears', 'newfound self esteem', 'absorb the lesson', 'feel the message', 'that my life', 'my worst nightmare', 'no satisfying fantasies', 'crush their spirits', 'it felt magical', 'thought my life'],
    "RSC_only": ['came to florida', 'traveled to marrakesh', 'moved to london', 'college in boston', 'moved to vermont', 'off in vancouver', 'home in minneapolis', 'triathlon in utah', 'year in ohio', 'vancouver in canada', 'home in chicago', 'back in marrakesh', 'go to spain', 'was in boston', 'living in chicago', 'sitting in indianapolis', 'in middlebury vermont', 'up in texas', "'m in michigan", 'in minneapolis'],
    "OPA_only": ['towards the ceiling', 'onto the railing', 'on the ceiling', 'you sit backward', 'towards the doors', 'against the railing', 'towards the door', 'seats behind', 'on the railing', 'twenty feet above', 'the door behind', 'feet hanging over', 'see the horizon', 'over the door', 'towards the ground', 'at the ceiling', 'his back turned', 'stand in front', 'that forms horizontal', 'towards the street'],
    "PPA_only": ['pulled a muscle', 'liar fake', 'theme she liked', "n't a fake", "'s a fake", 'kind of corny', 'of semi lame', 'nose job', 'his painting sucked', 'erect had rotted', 'ranch with', 'something she liked', "'s painting sucked", 'richard had autism', 'intentional lie or', 'or neutral intent', 'like a fake', 'have now messed', 'blueberry coffee cake', 'him semi jokingly'],
}

In [None]:
top_ngrams_df[rois]

In [34]:
rows = {
    'roi': rois,
    'expl': [explanations_clean[k] for k in rois],
    'top_ngrams_module_correct': [top_ngrams_clean[k] for k in rois],
    # 'question': questions,
    'subject': ['UTS02'] * len(rois),
    'voxel_nums': [rois_dict[k.replace('_only', '')] for k in rois],
}

In [36]:
pd.DataFrame(rows).to_pickle('rows_roi_uts02_may31.pkl')