In [34]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
from typing import List
import numpy as np
import notebook_helper
import mprompt.viz
import openai
from pprint import pprint
import joblib
from collections import defaultdict
from mprompt.config import RESULTS_DIR
import mprompt.llm
import json
openai.api_key_path = os.path.expanduser('~/.OPENAI_KEY')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Get prompts

In [42]:
def get_rows_voxels(seed=1, n_voxels_per_category=4):
    '''Select rows from fitted voxels
    '''
    r = (pd.read_pickle('../results/results_fmri.pkl')
        .sort_values(by=['top_score_synthetic'], ascending=False))
    r['id'] = "('" + r['top_explanation_init_strs'].str.replace(' ', '_').str.slice(stop=20) + "', '" + r['subject'] + "', " + r['module_num'].astype(str) + ")"

    # manually pick some voxels
    # with pd.option_context('display.max_rows', None, 'display.max_colwidth', 200):
    #     display(r.sort_values(by=['top_score_synthetic'], ascending=False)[
    #         ['top_explanation_init_strs', 'subject', 'module_num', 'top_score_synthetic', 'frac_top_ngrams_module_correct', 'id', 'top_ngrams_module_correct']
    #     ].round(3).reset_index(drop=True).head(50))


    # expls = ['baseball','animals','water','movement','religion','time','technology']
    # interesting_expls = ['food', 'numbers', 'physical contact', 'time', 'laughter', 'age', 'clothing']
    # voxels = [('movement', 'UTS01',	7), ('numbers', 'UTS03', 55), ('time', 'UTS03', 19), ('relationships', 'UTS01', 21),
            #   ('sounds', 'UTS03', 35), ('emotion', 'UTS03', 23), ('food', 'UTS03', 46)]
    # voxels = [('numbers', 'UTS03', 55), ('time', 'UTS03', 19),
            #   ('sounds', 'UTS03', 35), ('emotion', 'UTS03', 23), ('food', 'UTS03', 46)]
    # voxels = [('movement', 'UTS01',	7),('relationships', 'UTS01', 21) ('passing of time	UTS02	4)]
    # voxels = [('relationships', 'UTS02', 9), ('time', 'UTS02', 4), ('looking or staring', 'UTS03', 57), ('food and drinks', 'UTS01', 52), ('hands and arms', 'UTS01', 46)]

    # mar 21 - voxels spread across categories
    # voxels = [
    #     # belong to previous categories
    #     ('hands and arms', 'UTS01', 46),
    #     ('measurements and numbers', 'UTS02', 48),
    #     ('locations', 'UTS03', 87),
    #     ('time', 'UTS02', 4),
    #     ('physical injury or discomfort', 'UTS01', 35),
    #     ('feelings and emotions', 'UTS02', 104),
    #     ('relationships', 'UTS02', 9),

    #     # new voxels
    #     ('food and drinks', 'UTS01', 52),
    #     ('sound', 'UTS02', 81),
    #     ('hands and arms', 'UTS01', 46),
    # ]

    # mar 22 - UTS02 voxels
    voxels_dict = json.load(open(f'voxel_select/uts02_concepts_pilot_mar22.json', 'r'))
    d = defaultdict(list)

    # randomly shuffle the categories order + voxels within each category
    # return n_voxels_per_category per category
    rng = np.random.default_rng(seed)
    voxels_dict_keys = list(voxels_dict.keys())
    rng.shuffle(voxels_dict_keys)
    print(voxels_dict_keys)
    idxs_list = [rng.choice(len(voxels_dict[k]), n_voxels_per_category, replace=False) for k in voxels_dict_keys]
    for i, k in enumerate(voxels_dict_keys):
        idxs = idxs_list[i]
        d['voxels'].extend([tuple(vox) for vox in np.array(voxels_dict[k])[idxs]])
        d['category'].extend([k] * n_voxels_per_category)
    d = pd.DataFrame(d)
    # print(d.)
    voxels = d.voxels.values.tolist()

    # put all voxel data into rows DataFrame
    rows = []
    expls = []
    for vox in voxels:
        expl, subj, vox_num = vox
        vox_num = int(vox_num)
        try:
            rows.append(r[(r.subject == subj) & (r.module_num == vox_num)].iloc[0])
            expls.append(expl)
        except:
            print('skipping', vox)
    rows = pd.DataFrame(rows)
    rows['expl'] = expls
    # with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.max_colwidth', 200):
        # display(rows[['subject', 'module_num', 'expl', 'top_explanation_init_strs', 'top_ngrams_module_correct']])

    return rows, idxs_list, voxels

def get_rows_huth():
    '''Select rows corresponding to 2016 categories
    '''
    huth2016_categories = json.load(open('huth2016clusters.json', 'r'))
    r = pd.DataFrame.from_dict({'expl': huth2016_categories.keys(), 'top_ngrams_module_correct': huth2016_categories.values()})
    return r

# version = 'v4'
# EXPT_NAME = 'huth2016clusters_mar21_i_time_traveled'
# rows = get_rows_huth()

# EXPT_NAME = 'relationships_mar9'
# EXPT_NAME, version = ('voxels_mar21_hands_arms_emergency', 'v4_noun')
# rows = get_rows_voxels(seed=1)

seed = 3
EXPT_NAME, version = (f'uts02_concepts_pilot_mar22_seed={seed}', 'v4_noun')
rows, idxs_list, voxels = get_rows_voxels(seed=seed, n_voxels_per_category=4)
display(rows.head())

expls = rows.expl.values
examples_list = rows.top_ngrams_module_correct
prompts = notebook_helper.get_prompts(expls, examples_list, version, n_examples=4)
for p in prompts:
    print(p)
PV = notebook_helper.get_prompt_templates(version)

['food', 'repetition', 'locational', 'numeric', 'social', 'emotional', 'temporal']


Unnamed: 0,subsample_frac,checkpoint,checkpoint_module,noise_ngram_scores,module_num_restrict,seed,save_dir,module_name,module_num,subject,...,top_ngrams_module_25,top_ngrams_module_50,top_ngrams_module_75,top_ngrams_module_100,roi_anat,roi_func,top_ngrams_module_correct,frac_top_ngrams_module_correct,id,expl
148,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,292,UTS02,...,"[bag of peas, cloth like burberry, a walrus mu...","[bag of peas, cloth like burberry, a walrus mu...","[bag of peas, cloth like burberry, a walrus mu...","[bag of peas, cloth like burberry, a walrus mu...","[entorhinal, fusiform, parahippocampal]",[ATFP],"[bag of peas, sugar cubes, cucumber and mayonn...",0.4,"('food_items', 'UTS02', 292)",food items
431,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,79,UTS02,...,"[sliced cucumber, some sliced cucumber, butter...","[sliced cucumber, some sliced cucumber, butter...","[sliced cucumber, some sliced cucumber, butter...","[sliced cucumber, some sliced cucumber, butter...","[fusiform, inferiortemporal]",[ATFP],"[sliced cucumber, some sliced cucumber, butter...",0.44,"('food_preparation', 'UTS02', 79)",food preparation
986,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,101,UTS02,...,"[cucumber and mayonnaise, warm soda and, soake...","[cucumber and mayonnaise, warm soda and, soake...","[cucumber and mayonnaise, warm soda and, soake...","[cucumber and mayonnaise, warm soda and, soake...",[lateralorbitofrontal],[ATFP],"[cucumber and mayonnaise, warm soda and, like ...",0.573333,"('food_and_drink', 'UTS02', 101)",food and drinks
701,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,451,UTS02,...,"[cucumber and mayonnaise, blood running down, ...","[cucumber and mayonnaise, blood running down, ...","[cucumber and mayonnaise, blood running down, ...","[cucumber and mayonnaise, blood running down, ...",[rostralmiddlefrontal],[ATFP],"[cucumber and mayonnaise, lemon slices, some s...",0.506667,"('food_and_liquids', 'UTS02', 451)",food and drinks
808,1.0,text-davinci-003,gpt2-xl,0,-1,1,/home/chansingh/mntv1/mprompt/mar13,fmri,41,UTS02,...,"[smiled i smiled, personality any diagnosis, o...","[smiled i smiled, personality any diagnosis, o...","[smiled i smiled, personality any diagnosis, o...","[smiled i smiled, personality any diagnosis, o...",[superiortemporal],"[ATFP, AC]","[smiled i smiled, white this white, screaming ...",0.32,"('repetition', 'UTS02', 41)",repetition


Write the beginning paragraph of an interesting story told in first person. The story should have a plot and characters. The story should be about "food items". Make sure it contains several words related to "food items", such as "bag of peas", "sugar cubes", "cucumber and mayonnaise", "sliced cucumber".
Write the next paragraph of the story, but now make it about "food preparation". Make sure it contains several words related to "food preparation", such as "sliced cucumber", "some sliced cucumber", "buttered slices", "thinly sliced".
Write the next paragraph of the story, but now make it about "food and drinks". Make sure it contains several words related to "food and drinks", such as "cucumber and mayonnaise", "warm soda and", "like sour milk", "some sliced cucumber".
Write the next paragraph of the story, but now make it about "food and drinks". Make sure it contains several words related to "food and drinks", such as "cucumber and mayonnaise", "lemon slices", "some sliced cucumber"

In [43]:
paragraphs = mprompt.llm.get_paragraphs(prompts, prefix_first=PV['prefix_first'], prefix_next=PV['prefix_next'])
rows['prompt'] = prompts
rows['paragraph'] = paragraphs
for para in tqdm(paragraphs):
    print(para)
    # pprint(para)

cached!
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached
not cached


In [None]:
STORIES_DIR = join(RESULTS_DIR, 'stories')
os.makedirs(join(STORIES_DIR, EXPT_NAME), exist_ok=True)
joblib.dump(rows, join(STORIES_DIR, EXPT_NAME, 'rows.pkl'))

['/home/chansingh/mprompt/results/stories/uts02_concepts_pilot_mar22_seed=2/rows.pkl']