In [21]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
from typing import List
import numpy as np
import notebook_helper
import mprompt.viz
import openai
from pprint import pprint
import joblib
from collections import defaultdict
from mprompt.config import RESULTS_DIR
from typing import Tuple
import mprompt.llm
import json
openai.api_key_path = os.path.expanduser('~/.OPENAI_KEY')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Get prompts

In [22]:
def get_rows_voxels(seed, n_voxels_per_category=4):
    '''Select rows from fitted voxels
    '''
    r = (pd.read_pickle('../results/results_fmri.pkl')
        .sort_values(by=['top_score_synthetic'], ascending=False))
    r['id'] = "('" + r['top_explanation_init_strs'].str.replace(' ', '_').str.slice(stop=20) + "', '" + r['subject'] + "', " + r['module_num'].astype(str) + ")"

    def _voxels_to_rows(voxels: List[Tuple]) -> pd.DataFrame:
        # put all voxel data into rows DataFrame
        rows = []
        expls = []
        for vox in voxels:
            expl, subj, vox_num = vox
            vox_num = int(vox_num)
            try:
                rows.append(r[(r.subject == subj) & (r.module_num == vox_num)].iloc[0])
                expls.append(expl)
            except:
                print('skipping', vox)
        rows = pd.DataFrame(rows)
        rows['expl'] = expls
        # with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.max_colwidth', 200):
            # display(rows[['subject', 'module_num', 'expl', 'top_explanation_init_strs', 'top_ngrams_module_correct']])
        return rows

    # manually pick some voxels
    # with pd.option_context('display.max_rows', None, 'display.max_colwidth', 200):
    #     display(r.sort_values(by=['top_score_synthetic'], ascending=False)[
    #         ['top_explanation_init_strs', 'subject', 'module_num', 'top_score_synthetic', 'frac_top_ngrams_module_correct', 'id', 'top_ngrams_module_correct']
    #     ].round(3).reset_index(drop=True).head(50))


    # expls = ['baseball','animals','water','movement','religion','time','technology']
    # interesting_expls = ['food', 'numbers', 'physical contact', 'time', 'laughter', 'age', 'clothing']
    # voxels = [('movement', 'UTS01',	7), ('numbers', 'UTS03', 55), ('time', 'UTS03', 19), ('relationships', 'UTS01', 21),
            #   ('sounds', 'UTS03', 35), ('emotion', 'UTS03', 23), ('food', 'UTS03', 46)]
    # voxels = [('numbers', 'UTS03', 55), ('time', 'UTS03', 19),
            #   ('sounds', 'UTS03', 35), ('emotion', 'UTS03', 23), ('food', 'UTS03', 46)]
    # voxels = [('movement', 'UTS01',	7),('relationships', 'UTS01', 21) ('passing of time	UTS02	4)]
    # voxels = [('relationships', 'UTS02', 9), ('time', 'UTS02', 4), ('looking or staring', 'UTS03', 57), ('food and drinks', 'UTS01', 52), ('hands and arms', 'UTS01', 46)]
    # rows = _voxels_to_rows(voxels)
    # return rows

    # mar 21 - voxels spread across categories
    # voxels = [
    #     # belong to previous categories
    #     ('hands and arms', 'UTS01', 46),
    #     ('measurements and numbers', 'UTS02', 48),
    #     ('locations', 'UTS03', 87),
    #     ('time', 'UTS02', 4),
    #     ('physical injury or discomfort', 'UTS01', 35),
    #     ('feelings and emotions', 'UTS02', 104),
    #     ('relationships', 'UTS02', 9),

    #     # new voxels
    #     ('food and drinks', 'UTS01', 52),
    #     ('sound', 'UTS02', 81),
    #     ('hands and arms', 'UTS01', 46),
    # ]
    # rows = _voxels_to_rows(voxels)
    # return rows

    # mar 22 - UTS02 voxels in different categories
    voxels_dict = json.load(open(f'voxel_select/uts02_concepts_pilot_mar22.json', 'r'))
    d = defaultdict(list)

    # randomly shuffle the categories order + voxels within each category
    # return n_voxels_per_category per category
    # rng = np.random.default_rng(seed)
    # voxels_dict_keys = list(voxels_dict.keys())
    # rng.shuffle(voxels_dict_keys)
    # print(voxels_dict_keys)
    # idxs_list = [rng.choice(len(voxels_dict[k]), n_voxels_per_category, replace=False) for k in voxels_dict_keys]
    # for i, k in enumerate(voxels_dict_keys):
    #     idxs = idxs_list[i]
    #     d['voxels'].extend([tuple(vox) for vox in np.array(voxels_dict[k])[idxs]])
    #     d['category'].extend([k] * n_voxels_per_category)
    # d = pd.DataFrame(d)
    # # print(d.)
    # voxels = d.voxels.values.tolist()
    # rows = _voxels_to_rows(voxels)
    # return rows, idxs_list, voxels

    # mar 24 - UTS02 voxels after screening
    vals = pd.DataFrame([tuple(x) for x in sum(list(voxels_dict.values()), [])])
    vals.columns = ['expl', 'subject', 'module_num']
    voxel_nums = [
        337, 122, 168, 171, 79, 299, 368, 398
    ]
    vals = vals[vals['module_num'].isin(voxel_nums)]
    display(vals)
    voxels = vals.sample(frac=1, random_state=seed).values

    rows = _voxels_to_rows(voxels)
    return rows
    

def get_rows_huth():
    '''Select rows corresponding to 2016 categories
    '''
    huth2016_categories = json.load(open('huth2016clusters.json', 'r'))
    r = pd.DataFrame.from_dict({'expl': huth2016_categories.keys(), 'top_ngrams_module_correct': huth2016_categories.values()})
    return r

# version = 'v4'
# EXPT_NAME = 'huth2016clusters_mar21_i_time_traveled'
# rows = get_rows_huth()

# EXPT_NAME = 'relationships_mar9'
# EXPT_NAME, version = ('voxels_mar21_hands_arms_emergency', 'v4_noun')
# rows = get_rows_voxels(seed=1)

# seed = 10
# EXPT_NAME, version = (f'uts02_concepts_pilot_mar22_seed={seed}', 'v4_noun')
# rows, idxs_list, voxels = get_rows_voxels(seed=seed, n_voxels_per_category=4)

seed = 3
EXPT_NAME, version = (f'uts02_concepts_pilot_selected_mar24_seed={seed}', 'v4_noun')
rows = get_rows_voxels(seed=seed, n_voxels_per_category=4)
display(rows.head())

expls = rows.expl.values
examples_list = rows.top_ngrams_module_correct
prompts = notebook_helper.get_prompts(expls, examples_list, version, n_examples=4)
for p in prompts:
    print(p)
PV = notebook_helper.get_prompt_templates(version)

Unnamed: 0,expl,subject,module_num
0,moments,UTS02,337
61,measurements,UTS02,171
83,locations,UTS02,368
85,locations,UTS02,122
110,emotional expression,UTS02,398
137,surprise,UTS02,168
142,communication,UTS02,299
228,food preparation,UTS02,79


Unnamed: 0,subsample_frac,checkpoint,checkpoint_module,noise_ngram_scores,module_num_restrict,seed,save_dir,module_name,module_num,subject,...,top_ngrams_module_25,top_ngrams_module_50,top_ngrams_module_75,top_ngrams_module_100,roi_anat,roi_func,top_ngrams_module_correct,frac_top_ngrams_module_correct,id,expl
401,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,168,UTS02,...,"[i provoked gasps, real this time, fathers fee...","[i provoked gasps, real this time, fathers fee...","[i provoked gasps, real this time, fathers fee...","[i provoked gasps, real this time, fathers fee...",[middletemporal],"[ATFP, AC]","[i provoked gasps, asked i laughed, felt so be...",0.36,"('surprise,_shock,_dis', 'UTS02', 168)",surprise
431,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,79,UTS02,...,"[sliced cucumber, some sliced cucumber, butter...","[sliced cucumber, some sliced cucumber, butter...","[sliced cucumber, some sliced cucumber, butter...","[sliced cucumber, some sliced cucumber, butter...","[fusiform, inferiortemporal]",[ATFP],"[sliced cucumber, some sliced cucumber, butter...",0.44,"('food_preparation', 'UTS02', 79)",food preparation
742,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,398,UTS02,...,"[and mimed crying, she started laughing, face ...","[and mimed crying, she started laughing, face ...","[and mimed crying, she started laughing, face ...","[and mimed crying, she started laughing, face ...","[inferiorparietal, middletemporal]",[PMvh],"[and mimed crying, she started laughing, block...",0.613333,"('emotional_expression', 'UTS02', 398)",emotional expression
574,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,299,UTS02,...,"[housewarming gift, writing his obituary, i go...","[housewarming gift, writing his obituary, i go...","[housewarming gift, writing his obituary, i go...","[housewarming gift, writing his obituary, i go...",[inferiorparietal],[ATFP],"[writing his obituary, explain the joke, read ...",0.626667,"('communication', 'UTS02', 299)",communication
439,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,122,UTS02,...,"[onto the railing, them fly overhead, against ...","[onto the railing, them fly overhead, against ...","[onto the railing, them fly overhead, against ...","[onto the railing, them fly overhead, against ...",[inferiorparietal],"[OPA, ATFP]","[them fly overhead, stand in front, across the...",0.333333,"('location_or_movement', 'UTS02', 122)",locations


Write the beginning paragraph of an interesting story told in first person. The story should have a plot and characters. The story should be about "surprise". Make sure it contains several words related to "surprise", such as "i provoked gasps", "asked i laughed", "felt so betrayed", "suddenly seemed unlikely".
Write the next paragraph of the story, but now make it about "food preparation". Make sure it contains several words related to "food preparation", such as "sliced cucumber", "some sliced cucumber", "buttered slices", "thinly sliced".
Write the next paragraph of the story, but now make it about "emotional expression". Make sure it contains several words related to "emotional expression", such as "and mimed crying", "she started laughing", "blocks while screaming", "looked around scared".
Write the next paragraph of the story, but now make it about "communication". Make sure it contains several words related to "communication", such as "writing his obituary", "explain the joke", 

In [23]:
paragraphs = mprompt.llm.get_paragraphs(prompts, prefix_first=PV['prefix_first'], prefix_next=PV['prefix_next'])
rows['prompt'] = prompts
rows['paragraph'] = paragraphs
for i in tqdm(range(len(paragraphs))):
    para = paragraphs[i]
    print(para)
    # pprint(para)

# save
STORIES_DIR = join(RESULTS_DIR, 'stories')
EXPT_DIR = join(STORIES_DIR, EXPT_NAME)
os.makedirs(EXPT_DIR, exist_ok=True)
joblib.dump(rows, join(STORIES_DIR, EXPT_NAME, 'rows.pkl'))
with open(join(EXPT_DIR, 'story.txt'), 'w') as f:
    f.write('\n\n'.join(rows.paragraph.values))
with open(join(EXPT_DIR, 'prompts.txt'), 'w') as f:
    f.write('\n\n'.join(rows.prompt.values))

cached!
cached!
cached!
cached!
cached!
cached!
cached!
cached!


100%|██████████| 8/8 [00:00<00:00, 40970.00it/s]

I never expected to be the one to provoke gasps from a room full of people. But that's exactly what happened when I walked into my surprise party. "Surprise!" they all yelled, and I couldn't help but laugh. I had no idea they were planning this. My best friend, Sarah, had been acting so strange lately, but I never suspected a thing. As I looked around the room, I felt so betrayed that they could keep such a big secret from me. But at the same time, I was thrilled to be surrounded by all my loved ones. It suddenly seemed unlikely that anything could ruin this perfect moment.
As I made my way through the crowd, hugging and thanking everyone for coming, I couldn't help but notice the spread of food on the table. Some sliced cucumber and carrot sticks were arranged neatly next to a bowl of hummus. A platter of cheese and crackers sat next to a basket of buttered slices of bread. I could tell that Sarah had put a lot of thought into the food preparation. Everything was so beautifully presen


