In [1]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import sys
from IPython.display import display, HTML
from typing import List
from mprompt.modules.emb_diff_module import EmbDiffModule
import numpy as np
import matplotlib
import imodelsx.util
from copy import deepcopy
import re
import notebook_helper
import mprompt.viz
import scipy.special
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
from mprompt.methods.m4_evaluate import D5_Validator
import openai
from mprompt.modules.fmri_module import fMRIModule
from pprint import pprint
import joblib
from mprompt.config import RESULTS_DIR
import torch.cuda
openai.api_key_path = os.path.expanduser('~/.OPENAI_KEY')


r = (pd.read_pickle('../results/results_fmri.pkl')
    .sort_values(by=['top_score_synthetic'], ascending=False))
r['id'] = "('" + r['top_explanation_init_strs'].str.replace(' ', '_').str.slice(stop=20) + "', '" + r['subject'] + "', " + r['module_num'].astype(str) + ")"

### Select voxels

In [2]:
# manually pick some voxels
with pd.option_context('display.max_rows', None, 'display.max_colwidth', 200):
    display(r.sort_values(by=['top_score_synthetic'], ascending=False)[
        ['top_explanation_init_strs', 'subject', 'module_num', 'top_score_synthetic', 'frac_top_ngrams_module_correct', 'id', 'top_ngrams_module_correct']
    ].round(3).reset_index(drop=True).head(50))


# expls = ['baseball','animals','water','movement','religion','time','technology']
# interesting_expls = ['food', 'numbers', 'physical contact', 'time', 'laughter', 'age', 'clothing']
# voxels = [('movement', 'UTS01',	7), ('numbers', 'UTS03', 55), ('time', 'UTS03', 19), ('relationships', 'UTS01', 21),
        #   ('sounds', 'UTS03', 35), ('emotion', 'UTS03', 23), ('food', 'UTS03', 46)]
# voxels = [('numbers', 'UTS03', 55), ('time', 'UTS03', 19),
        #   ('sounds', 'UTS03', 35), ('emotion', 'UTS03', 23), ('food', 'UTS03', 46)]
# voxels = [('movement', 'UTS01',	7),('relationships', 'UTS01', 21) ('passing of time	UTS02	4)]
EXPT_NAME = 'relationships_mar9'
voxels = [('relationships', 'UTS02', 9), ('time', 'UTS02', 4), ('looking or staring', 'UTS03', 57), ('food and drinks', 'UTS01', 52), ('hands and arms', 'UTS01', 46)]

Unnamed: 0,top_explanation_init_strs,subject,module_num,top_score_synthetic,frac_top_ngrams_module_correct,id,top_ngrams_module_correct
0,physical or verbal expressions of emotion,UTS03,47,0.155,0.373,"('physical_or_verbal_e', 'UTS03', 47)","[and gesticulate wildly, and screamed obscenities, screamed obscenities, retorted rather loudly, waved wildly, quick kiss whispered, screamed the words, words were aggressive, a quick kiss, start ..."
1,"birthdays, birth years, and zodiac signs",UTS03,39,0.15,0.187,"('birthdays,_birth_yea', 'UTS03', 39)","[identical birthday, this identical birthday, even my birthday, two identical birthday, piscean, on my birthday, piscean if, birth year so, birthday of, to my birthday, his birth year, birthday so..."
2,a specific year in the 20th century,UTS02,54,0.146,0.813,"('a_specific_year_in_t', 'UTS02', 54)","[of nineteen fifty, of nineteen sixty, of nineteen forty, of nineteen eighty, and nineteen fifty, nineteen ninety, to nineteen forty, of nineteen ninety, nineteen thity, nineteen seventy, nineteen..."
3,looking or staring in some way,UTS03,57,0.099,0.4,"('looking_or_staring_i', 'UTS03', 57)","[eyed her suspiciously, at him incredulously, wink at, at me shyly, locks eyes with, staring at me, turned and saw, and mimed crying, incredulously like look, staring right at, point at a, leered ..."
4,a specific year or decade in the 20th century,UTS02,24,0.097,0.773,"('a_specific_year_or_d', 'UTS02', 24)","[in nineteen fifty, of nineteen fifty, in nineteen forty, of nineteen eighty, nineteen forty, nineteenth nineteen forty, of nineteen forty, to nineteen forty, 's nineteen forty, in nineteen sevent..."
5,sudden or unexpected action or event,UTS02,35,0.092,0.44,"('sudden_or_unexpected', 'UTS02', 35)","[room went silent, i provoked gasps, she started laughing, sudden i heard, the doorbell rang, suddenly i heard, sudden we hear, suddenly i hear, suddenly she looked, somebody then yelled, moment s..."
6,measurements of distance or time,UTS02,22,0.085,0.693,"('measurements_of_dist', 'UTS02', 22)","[fifty feet, fifteen twenty feet, crawl fifty meters, living fifty feet, every eight feet, shot just centimeters, to fifty feet, seventy to twenty, gun two inches, meters fifty feet, long sixty fo..."
7,"time, as many of them refer to specific years or periods of time",UTS02,59,0.08,0.573,"('time,_as_many_of_the', 'UTS02', 59)","[weeks became months, until nineteen sixty, replied age thirty, until nineteen seventy, to nineteen forty, april nineteen forty, in nineteen fifty, am turning forty, in nineteen sixty, and ninetee..."
8,a specific year in the 20th century,UTS02,5,0.076,0.76,"('a_specific_year_in_t', 'UTS02', 5)","[of nineteen fifty, nineteen sixty, of nineteen eighty, of nineteen sixty, nineteen ninety, nineteen eighty, nineteen seventy, nineteen forty, of nineteen forty, of nineteen ninety, nineteen fifty..."
9,a specific year,UTS02,16,0.076,0.467,"('a_specific_year', 'UTS02', 16)","[in nineteen fifty, in nineteen thirty, until nineteen sixty, in nineteen sixty, to nineteen forty, in nineteen forty, of nineteen eighty, nineteen forty, since nineteen eighty, nineteen sixty, un..."


In [3]:
# put all voxel data into rows DataFrame
rows = []
expls = []
for vox in voxels:
    expl, subj, vox_num = vox
    try:
        rows.append(r[(r.subject == subj) & (r.module_num == vox_num)].iloc[0])
        expls.append(expl)
    except:
        print('skipping', vox)
rows = pd.DataFrame(rows)
rows['expl'] = expls

with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.max_colwidth', 200):
    display(rows[['subject', 'module_num', 'expl', 'top_explanation_init_strs', 'top_ngrams_module_correct']])

Unnamed: 0,subject,module_num,expl,top_explanation_init_strs,top_ngrams_module_correct
88,UTS02,9,relationships,relationships and milestones in life,"[boyfriend of six, dating for months, boyfriend a year, married for fifteen, we got engaged, we were engaged, twenty he retired, a divorce twenty, virginity at twenty, am turning forty, daughter i..."
59,UTS02,4,time,passing of time,"[weeks became months, weekends became weeks, five years four, the moment passed, twenty minutes thirty, more time passed, replied age thirty, moment passed, later came the, days later diagnosed, m..."
46,UTS03,57,looking or staring,looking or staring in some way,"[eyed her suspiciously, at him incredulously, wink at, at me shyly, locks eyes with, staring at me, turned and saw, and mimed crying, incredulously like look, staring right at, point at a, leered ..."
32,UTS01,52,food and drinks,food and drink,"[holding two cups, bottle of, cucumber bell peppers, and potatoes apples, bread wheat wheatgrass, c4 rice wheatgrass, diet dr pepper, plates and cups, two dollar budweiser, oats barley rye, glass ..."
148,UTS01,46,hands and arms,hands and arms,"[grab his arms, rolled up sleeves, put his arms, put my arms, grabbed their hands, molded his hands, elbows at the, lifted her dress, with their thumbs, elbows on, grabbed her hand, elbows at, gra..."


# Generate story

In [4]:
version = 'v2'
prompts = notebook_helper.get_prompts(rows, version)
for p in prompts:
    print(p)
PV = notebook_helper.get_prompt_templates(version)

Write the beginning paragraph of a story told in first person. The story should be about "relationships". Make sure it contains several references to "relationships", such as "boyfriend of six", "dating for months", "boyfriend a year".
Write the next paragraph of the story, but now make it about "time". Make sure it contains several references to "time", such as "weeks became months", "weekends became weeks", "five years four".
Write the next paragraph of the story, but now make it about "looking or staring". Make sure it contains several references to "looking or staring", such as "eyed her suspiciously", "at him incredulously", "wink at".
Write the next paragraph of the story, but now make it about "food and drinks". Make sure it contains several references to "food and drinks", such as "holding two cups", "bottle of", "cucumber bell peppers".
Write the next paragraph of the story, but now make it about "hands and arms". Make sure it contains several references to "hands and arms", s

In [5]:
paragraphs = mprompt.llm.get_paragraphs(prompts, prefix_first=PV['prefix_first'], prefix_next=PV['prefix_next'])
rows['prompt'] = prompts
rows['paragraph'] = paragraphs
for para in paragraphs:
    pprint(para)

cached!
cached!
cached!
cached!
cached!
("I've always been fascinated by relationships. The way two people can come "
 "together and form a bond that lasts a lifetime is truly remarkable. I've had "
 "my fair share of relationships, some good and some bad. Currently, I'm "
 "dating my boyfriend of six months and things are going great. We've been "
 "through a lot together in such a short amount of time, but we've managed to "
 'come out stronger on the other side. Before him, I was in a relationship for '
 'a year that ended in heartbreak. But I learned so much from that experience '
 "and it's helped me appreciate the relationship I have now even more.")
('Time is a funny thing when it comes to relationships. It can feel like weeks '
 "become months and weekends become weeks when you're with the right person. "
 "But when you're in a toxic relationship, time can feel like it's standing "
 "still. I've been in both situations and I can say with certainty that time "
 'is a crucial fac

In [6]:
STORIES_DIR = join(RESULTS_DIR, 'stories')
os.makedirs(STORIES_DIR, exist_ok=True)
joblib.dump(rows, join(STORIES_DIR, f'{EXPT_NAME}_rows.pkl'))

['/home/chansingh/mprompt/results/stories/relationships_mar9_rows.pkl']

# Test synthetic data contains concept

In [None]:
val = D5_Validator()

In [None]:
# visualize single story
s = mprompt.viz.visualize_story_html(val, expls, paragraphs, prompts, fname='../results/story_running.html')
display(HTML(s))

In [None]:
# compute scores heatmap
scores = notebook_helper.compute_expl_data_match_heatmap(val, expls, paragraphs)

In [None]:
s = scores
# s = scipy.special.softmax(scores, axis=1)
# s = (s - s.min()) / (s.max() - s.min())
mprompt.viz.heatmap(scores, expls)

# Test modules on the generated stories

In [None]:
expls = rows.expl.values
voxel_nums = rows.module_num.values
subjects = rows.subject.values
scores_mod, scores_max_mod, all_scores, all_ngrams = \
    notebook_helper.compute_expl_module_match_heatmap(expls, paragraphs, voxel_nums, subjects)

In [None]:
s = scores_mod
s = scipy.special.softmax(s, axis=1)
# s = (s - s.min()) / (s.max() - s.min())
mprompt.viz.heatmap(scores, expls, xlab='Explanation of voxel used for evaluation', clab='Mean voxel response')

### Module responses for single story

In [None]:
pd.set_option('display.max_rows', 120)
for i in range(1): #len(voxels)):
    row = rows.iloc[i]
    display(row[['subject', 'module_num', 'top_explanation_init_strs', 'explanation_init_ngrams', 'top_ngrams_module_correct']])
    mod = fMRIModule(voxel_num_best=row.module_num, subject=row.subject)
    # show all rows
    # display(
    #     pd.DataFrame.from_dict({
    #         'score': all_scores[i][i],
    #         'ngram': all_ngrams[i][i],
    #     }).sort_values('score', ascending=False).head(10)
    # )

In [None]:
x = row['explanation_init_ngrams']
p = mod(x)


In [None]:
out = mod(x, return_all=True)
scores = row['explanation_init_outputs']

In [None]:
p = out[:, row.module_num]