In [None]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import sys
from typing import List
import numpy as np
import joblib
from pprint import pprint
import imodelsx.util
import sasc.viz
import pickle as pkl
from PIL import Image
import img2pdf
import json
from copy import deepcopy
from numpy.linalg import norm
from math import ceil
from sasc.config import CACHE_DIR, RESULTS_DIR, cache_ngrams_dir, regions_idxs_dir, FMRI_DIR
import sasc.modules.fmri_module
ngrams_list = joblib.load(join(cache_ngrams_dir, 'fmri_UTS02_ngrams.pkl')) # ngrams are same for both models

subject = 'S03'
suffix_setting = '_fedorenko'
# subject = 'S03'
# rois_dict = joblib.load(join(regions_idxs_dir, f'rois_{subject}.jbl'))
# rois = joblib.load(join(FMRI_DIR, 'brain_tune/voxel_neighbors_and_pcs/', 'communication_rois_UTS02.jbl'))
rois = joblib.load(join(FMRI_DIR, 'brain_tune/voxel_neighbors_and_pcs/', f'communication_rois_v2_UT{subject}.jbl'))
rois_dict_raw = {i: rois[i] for i in range(len(rois))}
# custom merge contralateral regions
if subject == 'S02':
    raw_idxs = [
        [0, 7],
        [3, 4],
        [1, 5],
        [2, 6],
    ]
elif subject == 'S03':
    raw_idxs = [
        [0, 7],
        [3, 4],
        [2, 5],
        [1, 6],
    ]
rois_dict = {
    i: np.vstack([rois_dict_raw[j] for j in idxs]).sum(axis=0)
    for i, idxs in enumerate(raw_idxs)
}
if suffix_setting == '_fedorenko':
    rois_fedorenko = joblib.load('lang_localizer_UTS03.jbl')
    rois_dict = {
        i: rois_fedorenko[i] for i in range(len(rois_fedorenko))
    }
    # rois_dict = rois_dict_raw

### Visualize ROIs
Make sure these actually look right (esp. with contralateral regions)

In [None]:
# # # save pcs
n_rois = len(rois_dict)
for i in tqdm(range(n_rois)):
    flatmap = rois_dict[i]
    # flatmap = np.zeros(weights_arr_full.shape[1])
    # flatmap[pfc] = pc_coefs_per_voxel[:, pc_num]
    sasc.viz._save_flatmap(
        flatmap, subject, fname_save=f'rois_custom_images/communication_{i}_{subject}.png')

# read all plots and save as subplots on the same page
C = 2
R = ceil(n_rois/C)
fig, axs = plt.subplots(R, C, figsize=(C * 4, R * 2))
axs = axs.ravel()
for i in range(n_rois):
    axs[i].imshow(Image.open(
        f'rois_custom_images/communication_{i}_{subject}.png'))
    axs[i].axis('off')
    axs[i].set_title(f'ROI {i}')
plt.savefig(f'communication_subplots_{subject}{suffix_setting}.png')

### Get predictions from embs
Run roi_custom_save_top_ngrams.py

### Load outputs

In [None]:
def return_ranked_df(outputs_dict, rois_dict):
    outputs_dict_rank = {}
    for i, k in tqdm(enumerate(outputs_dict)):
        outputs_by_vox = np.vstack(outputs_dict[k])  # n_voxels x n_ngrams
        roi_mean = outputs_by_vox.mean(axis=0)
        roi_mean_other = np.mean([
            np.vstack([outputs_dict[j]])[0].mean(axis=0)
            for j in rois_dict.keys() if j != i
        ])

        # standard approach: use avg response by voxel
        # outputs_dict_rank[k] = roi_mean

        # use ranks by voxel rather than avg response by voxel
        # outputs_dict_rank[k] = np.argsort(outputs_by_vox, axis=1).mean(
        #     axis=0) * -1  # / outputs_by_vox.shape[1]

        # just output voxels no ranking
        vox_idxs = np.where(rois_dict[i])[0]
        for i in range(len(outputs_dict[k])):
            # voxel only
            # outputs_dict_rank[f'ROI{k}_vox{vox_idxs[i]}'] = outputs_dict[k][i]

            # subtract mean of roi from the individual voxel
            # outputs_dict_rank[f'ROI{k}_vox{vox_idxs[i]}'] = outputs_dict[k][i] - \
            # roi_mean

            # subtract mean of all other voxels in roi from the individual voxel
            outputs_dict_rank[f'ROI{k}_vox{vox_idxs[i]}'] = outputs_dict[k][i] - \
                roi_mean_other

    df = pd.DataFrame(outputs_dict_rank, index=ngrams_list)
    return df


rank_individual_voxels = False
if rank_individual_voxels:
    outputs_dict = joblib.load(
        join(cache_ngrams_dir, f'rois_communication_ngram_outputs_dict_voxels_{subject}{suffix_setting}_opt.pkl'))
    df_opt = return_ranked_df(outputs_dict, rois_dict)
    outputs_dict = joblib.load(
        join(cache_ngrams_dir, f'rois_communication_ngram_outputs_dict_voxels_{subject}{suffix_setting}_llama.pkl'))
    df_llama = return_ranked_df(outputs_dict, rois_dict)
    df = df_opt + df_llama
else:
    outputs_dict = joblib.load(
        join(cache_ngrams_dir, f'rois_communication_ngram_outputs_dict_{subject}{suffix_setting}_opt.pkl'))
    df_opt = pd.DataFrame(outputs_dict, index=ngrams_list)
    outputs_dict = joblib.load(
        join(cache_ngrams_dir, f'rois_communication_ngram_outputs_dict_{subject}{suffix_setting}_llama.pkl'))
    df_llama = pd.DataFrame(outputs_dict, index=ngrams_list)
    df = df_opt + df_llama

    # replace df values with ranks (here, higher is better, both before and after)
    df = df.rank(axis=0)

    ROI_NAMES = rois_dict.keys()
    for k in ROI_NAMES:
        df_opt[str(k) + '_only'] = df_opt[k] - \
            df_opt[[c for c in ROI_NAMES if c != k]].mean(axis=1)
        df_llama[str(k) + '_only'] = df_llama[k] - \
            df_llama[[c for c in ROI_NAMES if c != k]].mean(axis=1)
        df[str(k) + '_only'] = df[k] - \
            df[[c for c in ROI_NAMES if c != k]].mean(axis=1)


stability_scores = {
    k: np.corrcoef(df_opt[k], df_llama[k])[0, 1]
    for k in df.columns
}

# get top ngrams (highest scores/ranks) for each ROI
top_ngrams_dict = {}
for k in tqdm(df.columns):
    top_ngrams_dict[k] = df[k].sort_values(
        ascending=False).index[:100].tolist()
top_ngrams_df = pd.DataFrame(top_ngrams_dict)
top_ngrams_df.to_csv(f'top_ngrams_by_roi_{subject}{suffix_setting}.csv')
with pd.option_context('display.max_rows', None):
    # rois = rois_dict.keys()
    # rois = [r for r in rois if not r == 'pSTS']  # never localized pSTS in S03
    display(top_ngrams_df.head(30))
top_ngrams_df.to_pickle(
    f'top_ngrams_custom_communication_{subject}{suffix_setting}.pkl')

In [None]:
# # rois known (may use these as baselines)

# outputs_dict_known = joblib.load(
#     join(cache_ngrams_dir, f'rois_ngram_outputs_dict_{subject}_opt.pkl'))
# df_opt_known = pd.DataFrame(outputs_dict_known, index=ngrams_list)
# outputs_dict_known = joblib.load(
#     join(cache_ngrams_dir, f'rois_ngram_outputs_dict_{subject}_llama.pkl'))
# df_llama_known = pd.DataFrame(outputs_dict_known, index=ngrams_list)
# df_known = df_opt_known + df_llama_known
# top_ngrams_dict_known = {}
# for k in df_known.columns:
#     top_ngrams_dict_known[k] = df_known.sort_values(k).index[:100].tolist()
# top_ngrams_df_known = pd.DataFrame(top_ngrams_dict_known)

In [None]:
# gpt4 = imodelsx.llm.get_llm('gpt-4-turbo-0125-spot')
# try:
#     explanations = json.load(open(f'explanations_by_roi_{subject}.json', 'r'))
# except:
#     explanations = {}
# for k in top_ngrams_df.columns:

#     s = '- ' + '\n- '.join(top_ngrams_df[k].iloc[:60])
#     prompt = f'''Here is a list of phrases:
#     {s}

#     What is a common theme among these phrases? Return only a concise phrase.'''
#     if not k in explanations:
#         explanations[k] = gpt4(prompt)
# # json.dump(explanations, open(
#     # f'explanations_by_roi_communication_{subject}.json', 'w'), indent=4)

In [4]:
explanations = json.load(
    open(f'explanations_by_roi_communication_{subject}{suffix_setting}.json', 'r'))
# dataframe of explanations and stability scores
explanations_df = pd.DataFrame(explanations, index=['explanation']).T
explanations_df['explanation'] = explanations_df['explanation'].str.lower().str.replace(
    r'[^\w\s]', '').str.replace(r'\s+', ' ').str.strip()
stab = pd.Series(stability_scores)
# set index to al lbe strings
stab.index = stab.index.astype(str)
explanations_df['stability'] = stab

# add top ngrams
top_ngrams_list_list = top_ngrams_df.T.apply(lambda x: x.tolist(), axis=1)
top_ngrams_list_list.index = top_ngrams_list_list.index.astype(str)
explanations_df['top_ngrams'] = top_ngrams_list_list
explanations_df.to_csv(
    f'communication_explanations_{subject}{suffix_setting}.csv')

with pd.option_context('display.max_rows', None, 'display.max_colwidth', None):
    display(explanations_df)

Unnamed: 0,explanation,stability,top_ngrams
0,personal experiences and relationships,0.509865,"[talked endlessly, googling her endlessly, canoodling commenced, the canoodling commenced, she in cahoots, my closest confidant, my constant companion, she talked endlessly, the paramedics insist, son had finally, would wax philosophical, given my history, and my therapist, anonymous twitter acquaintance, other egghead phds, i impress everyone, continue my quest, i 'd rehearsed, an illicit rendezvous, honking waterfowl ubiquitous, therapy i had, a mutual friend, spoke passionate russian, responsibilities i arranged, his opponents luddites, we fought constantly, the trip looming, our sporadic rendezvous, became my cellmate, religion my family, as an inveterate, hear my contemporaries, witnessing my incredibly, my shouted admonishments, that my shrink, had recently graduated, boyish almost biblical, inhabitants most demographers, bready puns ensued, america i often, illness my sister, met my wife, his pen pal, criticizing my hairdo, hiroshima i practiced, i had studiously, illicit rendezvous, terrific conversationalist, at my urging, accountant recently broached, a father myself, that my demure, into my incarceration, friend since childhood, point my shrink, memories they say, long existential conversations, years of babysitting, history she taught, me on dates, my country finally, been blindsided bullied, the surgery i, the visit i, puns ensued, the nurses insisted, were betrayed arrested, my most loyal, my highly sanitized, unsettled i sensed, threats hate mail, humor which i, been to therapy, obscure german chemists, holidays everyone counseled, our vows unimpeachable, romantic things lovelorn, philadelphia she practiced, the breakup i, done my research, aforementioned then girlfriend, a cambridge educated, arrive my savior, therapy but i, and i bickered, a pen pal, my past because, already my accountant, 's pen pal, and my upbringing, talked to friends, officer our instructor, personality any diagnosis, i get interviewed, my father finally, a terrific conversationalist, years of slacking, the common refrain, the teachers i, bossed me around]"
1,emotional interactions and reactions.,0.468325,"[googling her endlessly, retorted rather loudly, screamed obscenities, and screamed obscenities, she in cahoots, canoodling commenced, and gesticulate wildly, talked endlessly, doubt my sincerity, our vows unimpeachable, hurry she exclaimed, i walk briskly, smiling this smile, the canoodling commenced, i provoked gasps, my shouted admonishments, defense i exaggerated, unsettled i sensed, she wrote bluntly, and i sheepishly, smiled i smiled, criticizing my hairdo, practiced that answer, boyish almost biblical, quick kiss whispered, at me quizzically, repeated her affirmation, safe she scoffed, bready puns ensued, she talked endlessly, offensive i sincerely, my highly sanitized, been blindsided bullied, and screams fuck, honking waterfowl ubiquitous, ageless but clearly, compliment like wildly, i openly retaliated, the slut skank, said no i, screams fuck you, drawing me inescapably, spoke passionate russian, frantic very perceptive, i said shyly, and yelling i, that my demure, waved wildly, that i genuinely, at him incredulously, at me bewildered, a snide comment, justified my rudeness, the paramedics insist, personality any diagnosis, they shared dna, i impress everyone, arrive my savior, shouted admonishments, a sandwich rejected, call her my, i was yanked, other egghead phds, behind me feigning, and drooling possibly, impotently i ask, seemed boldly unafraid, his opponents luddites, played tetris furiously, my closest confidant, i had unwittingly, eventually i forgave, asked i laughed, hailed as idiotic, she yelled i, me his princess, gentleman tall handsome, finally self forgiveness, witnessing my incredibly, me and gesticulate, gesticulate wildly, pushed down lifted, would wax philosophical, puns ensued, cussing and sinning, i had studiously, of outrage disdain, response was nah, disappeared reason dictated, dollar i happily, google myself and, nuzzling me panting, sometimes lie occasionally, memories they say, myself well obviously, aforementioned then girlfriend, just guilty unredeemable, my constant companion, medicated into submission, loving me but]"
2,personal experiences and relationships,0.447274,"[this fire exting, not being able, not be able, fire exting, ever be able, hopefully being able, being able, of being able, have been able, to be able, and my therapist, she need jesus, that my shrink, a cambridge educated, other egghead phds, get myself arrested, a manhattan project, meet my, my highly sanitized, his pen pal, 's pen pal, losing my wife, i was gay, already my accountant, for my therapist, not having ptsd, arrive my savior, like aryan nation, cussing and sinning, dialing the unboyfriend, juggling and jesus, my black friends, my closest confidant, few great loves, that my friends, a prince charming, then a speechwriter, rescue my wife, a mutual friend, a pen pal, 'm somewhat famous, this aryan girl, spoke passionate russian, and my marriage, my maternal instinct, my shrink looks, me his princess, an evangelical christian, one lovely gentleman, that my flirting, his opponents luddites, flexibility mary kay, and this policeman, one girl friend, uh jewish policeman, told the unboyfriend, me an enabler, point my shrink, a real doctor, a con man, had asked god, of finnish heritage, like a jewish, aforementioned then girlfriend, half puerto rican, you that uncle, been mary kay, she 's jewish, been blindsided bullied, he finally axed, a devout atheist, as an inveterate, my jewlicious friend, job running interference, totally insane alcoholic, that my demure, a gay speechwriter, two named vladimir, became my beloved, the very adult, rican half italian, was french canadian, an aryan, 'd married well, khan my nanny, his cop training, lawfully wedded husband, and bachelor buddy, hustle a hustler, call the babysitter, let a lover, like whitney houston, her own neurosurgeon, smoking is butch, i 'm gay, start having christian, that mary kay, all good texans, my shrink is, quoted taylor swift]"
3,interactions and communications between individuals.,0.572471,"[spoke passionate russian, rescue my wife, and screamed obscenities, calls scotland yard, calls my brother, and screams fuck, homework my parrot, other egghead phds, screamed obscenities, and my therapist, groceries said hello, i provoked gasps, carried my bride, google myself and, cousin hava aunt, aforementioned then girlfriend, illness my sister, and my parrot, called me monsieur, backyard wielded screwdrivers, told him vamonos, the paramedics insist, married my brother, a cambridge educated, he goes ma'am, mom my sister, canoodling commenced, screams fuck you, pop singers moaned, singers moaned, the slut skank, hurry she exclaimed, meet my, became my cellmate, dressed like gandalf, splash her brother, partner michael radioed, paramedics the cops, dialing the unboyfriend, daughter my cousin, two named vladimir, scissors someone shouted, that my shrink, behind me grabbed, point my manager, tells my wife, watched her lover, a mutual friend, retorted rather loudly, me his princess, i said shyly, called my husband, downstairs our neighbors, murdering my fiance, to my boss, called my friend, said mom mom, missed my flatmates, called the neighbor, and sips his, mutual friend called, that my demure, and my translator, your violin curses, mother was french, now ex wife, caught our son, my jewlicious friend, my shrink looks, morning the neurosurgeon, obscure german chemists, khan my nanny, harm my son, he screams dude, bless fotomat guy, rough hands groped, point my shrink, my shouted admonishments, was of finnish, call from nasa, meets my dad, bready puns ensued, into my boss, name to christina, call the babysitter, cousin my sister, i was yanked, told my editor, the canoodling commenced, his opponents luddites, said uh hey, told the unboyfriend, waved wildly, call the paramedics, exhalation someone shouted, wrestling karl marx, and this policeman, says excuse me, arrive my savior, palm my fiance]"
4,interpersonal communication and relationships,0.504159,"[rescue my wife, google myself and, and goes hey, said no i, and screams fuck, retorted rather loudly, response was nah, said well yes, said well yeah, screams fuck you, says uh actually, said uh hey, groceries said hello, hurry she exclaimed, doctor well i, aforementioned then girlfriend, that my friends, he said sure, repeated her affirmation, practiced that answer, i provoked gasps, and said yes, i said fine, and says hey, be my husband, said excuse me, says sure and, she yelled i, said guess what, he asked okay, husband my husband, he goes ma'am, me his princess, and yelling i, her googling myself, was like hey, married my brother, told the unboyfriend, now ex wife, to marry me, said okay fine, carried my bride, him that yes, dialing the unboyfriend, impotently i ask, finally said okay, said did i, google her and, smiling this smile, she said yes, she in cahoots, marry this man, i just apologized, are witnessing me, wrote marry me, said without hesitation, i asked immediately, friends my friend, marriage with me, asked i laughed, out my husband, said mama i, says excuse me, tells my wife, call her my, he says sure, said well okay, and screamed obscenities, lawfully wedded husband, i said shyly, whisper she said, cousin my sister, losing my wife, girl like hey, point my manager, finally axed me, had asked god, beyond my husband, literally two humans, said hey babe, was like fuck, girlfriend now ex, eventually forgave me, a mutual friend, meet my, googling her endlessly, called me son, intent my friends, england well i, said well i, me over and, murdering my fiance, other egghead phds, watched her lover, he had googled, just nodded yes, and i sheepishly, googling her googling, wanted i asked, canoodling commenced]"
0_only,shared experiences and memories,0.617126,"[we were having, were having dinner, were having lunch, playing board games, day we spent, one evening after, was watching television, it was summer, the holidays, was hanging out, each semester we, every night we, tonight we, in my early, summer holidays and, we were making, were discussing, spent our summer, at this dinner, spent the afternoon, one evening, warm fall evening, we were discussing, in our childhood, culture there that, each night we, we were renting, i was younger, was doing my, were doing crafts, in our initial, we 'd often, as the semester, that first night, warm summer evening, having a potluck, were just having, for the potluck, we just kinda, anyway the day, i was visiting, camp we had, camp we were, was playing pool, growing up we, so the semester, we 'd always, was growing up, had this dinner, doing crafts, pretty amazing experience, i was spending, morning we gathered, remember having lunch, we were playing, were just starting, going to restaurants, spend the day, spent the morning, all the preparation, know for years, we were planning, 'm here tonight, having dinner, took the subway, we worked on, it was late, scuba trip, at a picnic, for the holidays, our discussions, then we headed, this particular evening, we always just, had the opportunity, one afternoon when, on that evening, can be awkward, almost every evening, routine we were, was pretty surreal, stressful situations, that evening, we were growing, always kind of, at the reunion, way our economy, over the years, christmas we were, strategy we, was at a, summer evenings, in my childhood, whenever we went, all these ceremonies, was working in, it was awkward, in recent years, we were sharing, working at the]"
1_only,expressions of feelings and perceptions,0.538261,"[it felt impossible, felt impossible, seem inherently, us seemed somehow, little far fetched, my stomach turning, sense of certainty, felt so relieved, look well grayish, so concisely, to know definitely, seemed somehow, excruciatingly slow, felt weird like, so obviously wrong, could probably maybe, stand very straight, managed to somehow, everything seemed hopeless, it tasted pretty, really brief like, looks so much, up feeling really, kind of revolted, be ok somehow, feels entirely, totally looks, reason seem doable, up so easily, somehow easier, them up perfectly, it felt strangely, floor was steeper, i laughed maybe, seemed somehow easier, feels more ridiculous, and afraid somehow, was all misshapen, it felt okay, so thoroughly, feels inevitably, designed out entirely, situation so concisely, super tiny like, feeling of accomplishment, know for certain, realize exactly, looks really, louder it seems, starting to realize, so easily so, not really fully, however tenuously, feeling okay, just so profoundly, felt triumphant, good but somehow, much privacy somehow, walking very slowly, far too quickly, ways that ostensibly, for about maybe, seeming slow motion, it seemed beyond, turns really quickly, it only maybe, estimate maybe, oppressively hot, extremely inefficient, seconds goes quickly, felt quite significant, slowly at first, things in terms, ugly but, felt sort of, felt completely, looks kind of, it was unbearably, heart beating fast, that 's ridiculous, answer feels entirely, tiny like there, kind of vague, ridiculous it, it feeling rather, the effort necessary, areas seem inherently, way that feels, believable enough, seemed hopeless, seem doable, uh sure yeah, bit gloomy though, it all feels, seemed kinda, sort of leans, filled with dread, just seemed too, drop just instantly, really stunningly beautiful]"
2_only,there is no common theme among these phrases.,0.293927,"[this inflatable, straight edge, white knight, black box, for this inflatable, 's an alcohol, tennant, no children, evil empire, three nobel peace, forty four cranes, stay at home, abandonment, st bernard, atlas, nobel peace, fall out, one single print, night man, air force, ymca, children 's peace, this pick up, dragonfly, only child, and casualty, yuji, tinker, a chimera, gold star, big white, this blind, left behind, beeper, cold war, white lady, no plane, outsider, of superpower, beaker, the tea party, pager, firemen, the cold war, sweetener, this rabbit, superpower, vader is the, six year old, 's the blackout, sweet baby, too many children, a cabbage patch, red light, five year old, goth, enabler, 's a blackout, in an inflatable, and racially, and identity, white skin, less able, paul, bear in, an inflatable, a blackout, this crystal, alcoholic, special immigrant, oh man attachment, death penalty, different kids, oh baby, a fourteen year, n't just a, white taxi, nineties new, good guy, wants to kill, dirtbag, linear economy, first home, shilling, year old child, free to be, cover up, blonder, tetris effect, tuckerman, stephen, impossible to attribute, a steiner, the blind, a tea party, vader, other pager, party line, nantucket, a car bomb]"
3_only,"""actions involving hands and objects""",0.684034,"[popop sitting on, wrapped my napkin, hold handkerchief, flames around his, sitting on his, sat on his, grabbed the basket, to hold handkerchief, goggles and fins, towel on her, donkey next to, white handkerchief to, handkerchief to, hand in his, knife in his, antoine brun and, buttons of his, tears in his, thumb over his, bit nineteen sixty, feet kicking through, clicked on his, the beaker and, boat pulls up, 's nineteen seventy, black rotary dial, beaker in his, wraps his, rolling down his, took a blanket, fingers and his, clutching, shoes between his, call the anchorage, knife and, he reached under, pillow in her, woman were crying, gun in his, handset and, throwing napkins, wrapped the rope, irri thirty five, silver paper and, bed bobbing through, picked his hands, boots clanking on, napkin around, your forefingers and, drag their beach, boat twenty five, nineteen seventy, microphone and, took a scarf, bus stop and, hundred n fifty, her little eyes, ram 's throat, scratched her ears, fork and knife, player mikey and, see the microphone, there from julie, reaches in his, biplanes parked, in nineteen fifty, hold a coconut, your sheep gloves, 's skull touching, clanking on the, reached under her, her sails, and wrapping their, cow munching some, 's nineteen forty, from jarkko twenty, pug laughing, silver twenty nine, 're throwing napkins, guy sitting on, bring our blankets, and the chopsticks, tears rolling down, was sitting behind, pan and, hundred ninety, him sitting on, bag tied the, finger and, woman wrapped in, sat on a, saw his fingers, their milk bottle, an overturned cardboard, their thumbs and, hand on his, with two hundred, calif jenny and, in his pocket, gripping the]"
4_only,"the common theme among these phrases is ""communication or conversation with uncertainty or hesitation"".",0.584923,"[mean you it, you should you, so why did, this for you, them what happened, he could you, do that then, basically what happened, leave and then, and then what, well why did, do realize what, that uh it, even realized why, between those two, they uh you, than um well, looking down at, somehow and then, 'll do it, then come home, him and then, covers everything then, first and then, come back and, then what happened, because um what, like could you, than use them, bring him here, him or why, uh did, well you you, wanted to or, then you they, and then then, we uh we, why you did, mean by that, them he said, do and then, everything and also, say and then, 'll do that, doing with them, then come back, it for him, to um well, we would we, for them for, understand what that, you arrive then, and then and, well um yes, you what happened, everything for him, was and then, he said to, we um we, like oh okay, before and then, 'll do everything, turned around to, know what for, would when you, so i uh, happened and then, it um you, 's like yes, 'd like you, 'd come and, for no it, towards us, i would you, wanted to ask, ever did to, so instead what, and will do, nodded and then, once and then, come and then, say to him, turn around to, that uh you, then yeah, know why and, that uh we, do about it, there um you, why for, um then we, doing with, more and then, and what happened, for you or, she um you, even remember why, them to you, put under you, so so what]"


In [None]:
# lowercase, remove punctuation, trim
explanations_df['explanation'] = explanations_df['explanation'].str.lower().str.replace(
    r'[^\w\s]', '').str.replace(r'\s+', ' ').str.strip()
explanations_df['explanation'].value_counts().head(50)
# split index on _
explanations_df['roi'] = explanations_df.index.str.split('_').str[0]
# groupby roi and show most common explanations
# display all
with pd.option_context('display.max_rows', None):
    display(explanations_df.groupby('roi')[
            'explanation'].value_counts().groupby('roi').head(5))
# explanations_df.groupby('roi')['explanation'].value_counts().groupby('roi').head(50)
# explanations_df.to_csv(f'communication_explanations_{subject}.csv')

### S03 Export selected rois to pkl

In [None]:
rois = ['RSC', 'OPA', 'PPA', 'IPS', 'sPMv', 'EBA', 'OFA'] + \
    ['RSC_only', 'OPA_only', 'PPA_only']  # all but 'pSTS'
# pprint({k: explanations[k] for k in rois})
explanations_clean = {
    'EBA': 'Body parts',
    'IPS': 'Descriptive elements of scenes or objects',
    # OFA differs from UTS02 (which was "'Personal growth and reflection',")
    'OFA': 'Conversational transitions',
    'OPA': 'Direction and location descriptions',
    # OPA_only differs from UTS02 (which was 'Spatial positioning and directions')
    'OPA_only': 'Self-reflection and growth',
    'PPA': 'Scenes and settings',
    'PPA_only': 'Unappetizing foods',
    'RSC': 'Travel and location names',
    'RSC_only': 'Location names',
    # sPMv differs from UTS02 (which was 'Time and Numbers')
    'sPMv': 'Dialogue and responses',
}
explanation_avoid_suffixes = {
    'EBA': ' Avoid mentioning any locations.',
    'IPS': ' Avoid mentioning any locations.',
    'OFA': ' Avoid mentioning any locations.',
    'OPA': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'OPA_only': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'PPA': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'PPA_only': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'RSC': '',
    'RSC_only': '',
    'sPMv': ' Avoid mentioning any locations.'
}
for roi in rois:
    print(f'"{roi}":', str(
        top_ngrams_df[roi.replace('1', '').replace('2', '')].iloc[:50].values.tolist()) + ', ')
top_ngrams_clean = {
    "RSC": ['was led upstairs', 'onto the subway', 'to the hallway', 'drove to washington', 'back through london', 'and darted downstairs', 'past the offices', 'long hallway toward', 'down the sidewalk', 'back in manhattan', 'reached the interstate', 'just blocks away', 'drove from vermont', 'was standing outside', 'to a courtyard', 'in the alley', 'up the coast', 'from my dorm', 'in the courtyard', 'in central park', 'i walk outside', 'here in manhattan', 'darted downstairs', 'facing the beach', 'walk through downtown', 'wander the hallways', 'i ran downstairs', 'down the hall', "'m standing outside", 'off into vancouver', 'through the streets', 'sitting in indianapolis', 'on sixth avenue', 'i go upstairs', 'across the street', 'arrived in indianapolis', 'we were downtown'],
    "OPA": ['railing looking out', 'across a plateau', 'up the coast', 'against the railing', 'in the courtyard', 'up the hill', 'above the gulf', 'outside the windows', 'long hallway toward', 'over the gulf', 'past the offices', 'through the windows', 'beside the river', 'past the waterfall', 'across the bridge', 'this long hallway', 'to a courtyard', 'and the courtyard', 'and behind me', 'down this embankment', 'towards the river', 'the hill up', 'courtyard was surrounded', 'in an alcove', 'onto the railing', 'along the coast', 'up the stairs', 'across the quadrangle', 'facing the beach', 'to the north', 'down the corridor', 'through the gates', 'over the embankment', 'onto the bridge', 'down that corridor', 'down the sidewalk', 'i looked across', 'path that jutted', 'through this door', 'the lagoon behind', 'down the embankment', 'on the railing', 'on the embankment', 'through the doors', 'on the windowsill', 'corridor out onto', 'the buildings beside', 'to the hallway', 'by that window', 'past the city', 'door behind me', 'to the south', 'off the coast', 'cross the bering', 'around the reef', 'behind me i', 'driveway and behind', 'against the windows', 'across the street', 'to the shoreline', 'lagoon behind the', 'on the sidewalk', 'hall past the', 'off the east', 'of the ravine', 'surrounded the city', 'in the window', 'southern shore of', 'in the distance', 'onto the sidewalk', 'i look across', 'behind us i', 'behind us there', 'on the cliff', 'over the river', 'toward the ocean', 'on that terrace', 'row of stalls', 'sidewalk in front', 'down the long', 'on the walls', 'door to the', 'by the window', 'outside my door', 'outside the door', 'across from me', 'on the eastern', 'the hall past', 'down the lagoon', 'in the forest', 'that window in', 'around me the', 'to the barrier', 'the gulf where', 'road in front', 'in the hallway', 'across the parking', 'in the colonnade', 'to the western', 'surrounded by rooms'],
    "PPA": ['in an alcove', 'on the stoop', 'past the offices', 'against the railing', 'on the windowsill', 'in the alley', 'to a courtyard', 'the copier room', 'in the courtyard', 'this long hallway', 'to the hallway', 'on a dock', 'in the hallway', 'long hallway toward', 'outside the windows', 'on that terrace', 'inside the hut', 'railing looking out', 'through the windows', 'down this embankment', 'on the subway', 'onto the subway', 'there were shelves', 'in my cubicle', 'a strip mall', 'on the sidewalk', 'in the colonnade', 'on the railing', 'into the basement', 'across the parking', 'a restaurant stoop', 'onto the railing', 'exit the subway', 'by the window', 'in that attic', 'was led upstairs', 'in the basement', 'the food court', 'and the courtyard', 'in the cafeteria', 'hall past the', 'into the parking', 'in the windowless', 'back room where', 'on my bed', 'down the sidewalk', 'contain strip malls', 'onto the sidewalk', 'the hall closet', 'at those cliffs'],

    "RSC_only": ['moved to chicago', 'drove from vermont', 'came to florida', 'here in manhattan', 'living in chicago',  'move to texas', 'leaving for france', 'back in manhattan', 'to boston to', 'went to boston', 'moved to vermont', 'geese in ohio', 'college in boston', 'in ohio', 'moved to brooklyn', 'normal suburban pittsburgh', 'moved to london', 'back in israel', 'to london to', 'come from israel', 'went to manchester', 'to columbus ohio', 'here in boston', 'i left vermont', 'from pittsburgh pennsylvania', 'in lower manhattan', 'hometown in texas', 'touring through europe', 'in warmer mexico', 'union in manhattan', 'suburban pittsburgh', 'moved to washington', 'was in boston', 'slacking in madison', 'chick from silverlake', 'heading to iraq', 'in chicago', 'in louisville kentucky', 'lived in hiroshima', 'in florida'],
    "OPA_only": ['eventually i forgave', 'push past it', 'eventually forgave', 'she eventually forgave', 'i forgave', 'to see ourselves', 'of myself which', 'means extending empathy', 'forgive and', 'i stopped myself', 'forgive and love', 'of the hurt', 'i rise above', 'i pushed myself', 'is and who', "'m hurt but", 'see ourselves and', 'the hurt', 'i persisted and', 'to forgive afterwards', 'was real to', 'comparing myself', 'looked in myself', 'selves which', 'around it and', 'self which translated', 'inside me that', 'of me which', 'overcome my ambivalence', 'to push myself', 'was also influenced', 'the side which', 'side which', 'independence and freedom'],
    "PPA_only": ['a garbage bag', 'that garbage bag', "'re throwing napkins", 'box of discarded', 'in sugar jars', 'their chew toys', 'those plastic containers', 'our dishwasher', 'skivvies toothbrush floss', 'of cheap beer', 'throwing napkins', 'a trash can', 'milk bottle tops', 'want a mcflurry', 'vomit smelling couch', 'grown napkins', 'my cheese sandwich', 'overpriced coffee shops', 'some lighter fluid', 'salad and stale', 'vomited a washpan', 'these brown paper'],

    "IPS": ['and behind me', 'against the railing', 'onto the railing', 'path that jutted', 'situated herself behind', 'above the gulf', 'door behind me', 'southern shore of', 'i looked across', 'along the edge', 'closed behind me', 'behind me and', 'across a plateau', 'on the railing', 'up behind me', 'leaning against the', 'towering above me', 'jutted into the', 'onto the bridge', 'and cut across', 'behind him and', 'and came around', 'front of us', 'up onto the', 'over the gulf', 'stood behind me', 'across the bridge', 'beside the river'],
    "sPMv": ['repeated her affirmation', 'said excuse me', 'asked i laughed', 'and goes hey', 'response was nah', 'hurry she exclaimed', 'said no i', 'just nodded yes', 'retorted rather loudly', 'was like hey', 'called her and', 'and said yes', 'and screams fuck', 'said uh hey', 'says sure and', 'says uh actually', 'was like hi', 'i said fine', 'said without hesitation', 'said well yes', 'says excuse me', 'i asked immediately', 'she yelled i', 'said mom mom', 'said did i', 'i said wow', 'i said shyly', 'asked her and', 'said okay okay', 'i sheepishly raise', 'which i responded', 'turned and said', 'then wrote yes', 'said yes i', 'whisper she said', 'was like mhm'],
    "EBA": ['arms around her', 'wraps his arms', 'hands gripped the', 'into my palm', 'hands into my', 'elbows on knees', 'grab his arms', 'his hands folded', 'into her arms', 'grabbed her hand', 'arms flailing', 'grabbed her legs', 'arm around my', 'grabbed their hands', 'lifted her up', 'put my arms', 'leaned his head', 'put his arms', 'shakes my hand', 'flying arms flailing', 'i leaned down', 'arms tighten around', 'her hands gripped', 'hand on his', 'my feet kicking', 'pinning my arms', 'held her hand', 'in a headlock', 'pressed my face', 'holds her hand', 'arms flailing holding', 'rubbing his head'],
    "OFA": ['and we talked', 'i even met', 'and so finally', 'one night my', 'one evening after', 'anyway the point', 'one summer my', 'weeks passed and', 'finally one day', 'then we talked', 'one night i', 'we chatted', 'and i talked', 'we talked and', 'talked and', 'so i texted', 'to my surprise', 'one afternoon when', 'i persisted and', 'and i finally', 'was watching television', 'i remember once', 'so one night', 'but anyway', 'and i met', 'when i finally', 'so i finally', 'on and on', 'son had finally', 'and i especially', 'so one day', 'and eventually i', 'we brace ourselves', 'so anyway', 'i was perusing', 'and it finally', 'later that day', 'so we finally', 'but anyhow', 'and as we', 'the day came', 'home one afternoon', 'then i finally', 'what fascinated me', 'and i vaguely', 'so i talked', 'once while i', 'was hanging out', 'i was reliving', 'but the most'],
}

rows = {
    'roi': rois,
    'expl': [explanations_clean[k] for k in rois],
    'top_ngrams_module_correct': [top_ngrams_clean[k] for k in rois],
    'stability_score': [stability_scores[k.split('_')[0]] for k in rois],
    'subject': [f'UT{subject}'] * len(rois),
    'voxel_nums': [rois_dict[k.split('_')[0]] for k in rois],
    'prompt_suffix': [explanation_avoid_suffixes[k] for k in rois],
}
rows = pd.DataFrame(rows)
rows.to_pickle(f'rows_roi_ut{subject.lower()}_may31.pkl')

### S02 Export selected rois to pkl

In [None]:
rois = ['RSC', 'OPA', 'PPA', 'IPS', 'pSTS', 'sPMv',
        'EBA', 'OFA'] + ['RSC_only', 'OPA_only', 'PPA_only2']  # 'PPA_only1',
# pprint({k: explanations[k] for k in rois})
explanations_clean = {
    'EBA': 'Body parts',
    'IPS': 'Descriptive elements of scenes or objects',
    'OFA': 'Personal growth and reflection',
    'OPA': 'Direction and location descriptions',
    'OPA_only': 'Spatial positioning and directions',
    'PPA': 'Scenes and settings',
    'PPA_only': 'Unappetizing foods',
    'RSC': 'Travel and location names',
    'RSC_only': 'Location names',
    'pSTS': 'Verbal interactions',
    'sPMv': 'Time and numbers'}
explanation_avoid_suffixes = {
    'EBA': ' Avoid mentioning any locations.',
    'IPS': ' Avoid mentioning any locations.',
    'OFA': ' Avoid mentioning any locations.',
    'OPA': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'OPA_only': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'PPA': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'PPA_only': ' Avoid mentioning any specific location names (like "New York" or "Europe").',
    'RSC': '',
    'RSC_only': '',
    'pSTS': ' Avoid mentioning any locations.',
    'sPMv': ' Avoid mentioning any locations.'
}
for roi in rois:
    print(f'"{roi}":', str(
        top_ngrams_df[roi.replace('1', '').replace('2', '')].iloc[:50].values.tolist()) + ', ')
    # {
    # roi:  for roi in rois
# })
top_ngrams_clean = {
    "RSC": ['drove from vermont', 'to washington', 'in manhattan', 'here in boston', 'off into vancouver', 'moved to chicago', 'was in mexico', 'arrived in indianapolis', 'came to florida', 'i left vermont'],
    "OPA": ['onto the railing', 'towards the river', 'onto the sidewalk', 'towards the doors', 'outside the windows', 'long hallway toward', 'to the horizon', 'towards the street', 'over the gulf', 'to my left', 'path that jutted', 'on the ceiling', 'on the windowsill', 'down this embankment', 'up those stairs', 'above the gulf', 'facing the beach'],
    "PPA": ['mile of cornfields', 'the windowsill', 'the rolling hills', 'beautiful moonlit mountains', 'giant stone cliffs', 'a strip mall', 'nondescript office buildings', 'manicured lawns', 'lakes', 'the dark driveway', 'and shimmering skyscrapers', 'a private beach', 'the leafy garden', 'our modest backyard', 'my dorm'],

    "RSC_only": ['florida', 'israel', 'london', 'marrakesh', 'indianapolis', 'paris', 'pennsylvania', 'tokyo', 'tenessee', 'boston', 'vermont', 'chicago', 'indianapolis'],
    "OPA_only": ['towards the ceiling', 'onto the railing', 'feet hanging over', 'towards the doors', 'seats behind', 'towards the door', 'lights peeking over', 'to my left', 'situated herself behind', 'you sit backward', 'to the horizon', 'maybe twelve feet', 'at the ceiling', 'towards the street', 'of seats behind', 'twenty feet above', 'his back turned', 'see the horizon', 'seats behind the', 'to my right', 'and high rafters', 'about twenty feet', 'door behind me', 'the door behind', 'toward the back', 'over his shoulder', 'feet above the', 'hands went underneath', 'towards the ground', 'his feet hanging', 'feet touch the', 'behind her and', 'stand in front', 'down one side', 'on opposite sides', 'over the ceiling', 'on either side'],
    # "PPA_only": ['kind of corny', 'his painting sucked', 'snake oil', 'liar fake', 'fake name', 'bad puns', 'as an insult', 'called baloney'],
    "PPA_only2": ['like burnt steak', 'like pudding', 'tasted pretty bad', 'stale baked goods', 'the crusts', 'baloney', 'yeast extract', 'a sandwich rejected',],

    "IPS": ['there were slats', 'four connected squares', 'in long rows', 'on the sides', 'a long narrow', 'that forms horizontal', 'long rows of', 'sixty foot wide', 'between buttered slices', 'mile thick ice', 'all four corners', 'along the top'],
    "pSTS": ['said excuse me', 'says excuse me', 'room went silent', 'someone shouted', 'i provoked gasps', 'somebody then yelled', 'she started laughing', 'excuse me', 'asked i laughed', 'exhalation someone shouted', 'retorted rather loudly', 'turned and said', 'hurry she exclaimed', 'i started yelling', 'say excuse me', 'i started laughing', 'interrupted the conversation', 'breath he yelled', 'moment she gasped', 'said guess what'],
    "sPMv": ['one', 'forty', 'april nineteen forty', 'was sixteen seventeen', 'five only twenty', 'three down', 'march twentieth nineteen', 'more time passed', 'fifteen meters fifty', "turning ninety", 'june of nineteen'],
    "EBA": ['wraps his arms', 'lifted her dress', 'arms flailing', 'hands gripped the', 'grabbed her legs', 'his hands folded', 'my feet kicking', 'navigated pushy elbows', 'elbows on knees', 'over his shoulder'],
    "OFA": ['of my childhood', 'newfound self esteem', 'so my shrink', 'hurtful first dates', 'recall many instances', 'it felt magical', 'answered many questions', 'my school days', 'no satisfying fantasies', 'my mom often', 'from our childhood', 'growing up we', 'good friends often', 'shaped their mind', 'everything my parents'],
}

rows = {
    'roi': rois,
    'expl': [explanations_clean[k] for k in rois],
    'top_ngrams_module_correct': [top_ngrams_clean[k] for k in rois],
    'stability_score': [stability_scores[k.split('_')[0]] for k in rois],
    'subject': [f'UT{subject}'] * len(rois),
    'voxel_nums': [rois_dict[k.split('_')[0]] for k in rois],
    'prompt_suffix': [explanation_avoid_suffixes[k] for k in rois],
}
rows = pd.DataFrame(rows)
rows.to_pickle(f'rows_roi_ut{subject.lower()}_may31.pkl')