In [None]:
import sys
sys.path.insert(0,'..')
sys.path.insert(0,'/Users/ryan/github/prosodic')
from llmdh import *
from sqlitedict import SqliteDict
import os

In [None]:
prompts=[
    'Write a short story with 3 characters.',
    'Write a story with 3 characters.',
    'Write a story with at least 3 characters.',
    'Write a short piece of fiction involving 3 characters.',
    'Write a science fiction story involving 3 characters.',
    'Write a story involving 3 characters in the genre of a fantasy novel.',
    'Write a story set in Texas involving 3 characters.',
    'Write a story set in New York City involving 3 characters.',
    # 'Write a story involving at least 3 characters and 1 sex scene.',
    'Write a short story in the romance genre.',
    'Write a short piece of literary fiction.'
]
models = [
    LLM_DEFAULT_MODEL_LOCAL,
    'gpt-3.5-turbo',
    'gpt-4-turbo',
    'claude-3-opus-20240229', 
    'claude-3-sonnet-20240229', 
    'claude-3-haiku-20240307',
    'gemini-pro'
]

def generate_lots(
    prompts=prompts,
    models = models,
    dbfn='sqlitedict_stories.db'
):
    path_db=os.path.join(PATH_DATA,dbfn) if not os.path.isabs(dbfn) else dbfn
    db=SqliteDict(path_db, autocommit=True)
    while True:
        prompt = random.choice(prompts)
        model = random.choice(models)
        temp = random.uniform(0.0, 2.0 if not 'claude' in model else 1.0)
        txt = LLM.generate(prompt, temp=temp, model=model, verbose=False)
        if not txt: continue
        key=hashstr(model,temp,prompt,txt)
        odx={'model':model, 'temp':temp, 'prompt':prompt, 'txt':txt}
        db[key] = odx
        print(txt)
        print(f'—{model} (temp={temp:.2f}) [{prompt.replace("\n", " / ")}]')
        # time.sleep(random.uniform(3,10))

In [3]:
generate_lots()

In [None]:
system_prompt='''You are a literary critic assessing whether stories pass the "Bechdel test", which involves 3 components: 
1. Does the story contain more than 1 female character?
2. Do two female characters speak to one another?
3. Do they speak about something other than a man?

Read the following story and return valid JSON dictionary containing this information.
'''

example_prompts=[
    (
'''
Emily and Emma were close friends, though a little jealous of each other. Neither knew which of them Michael, a mutual friend of theirs for many years, liked more.

One day, Emily said to Emma, "I don't know if I can live with this uncertainty. Michael has known both of us for years. When is he going to settle down?"

"What are you implying, Emily?"

Just then, Michael entered the room, holding flowers. His eyes shifted from left to right, from Emily to Emma, before slowly opening his mouth and saying, "I love you both equally. Let's become a throuple!"

Both Emily and Emma were disgusted, and left the room, never to speak to Michael again.
''',
'''
{
    "female_characters": ["Emily", "Emma"],
    "male_characters": ["Michael"],
    "female_characters_speak_to_each_other": true,
    "female_characters_speak_about_something_other_than_a_man": false
}
'''
    )
]

json_keys={
    'female_characters',
    "male_characters",
    "female_characters_speak_to_each_other",
    "female_characters_speak_about_something_other_than_a_man",
}


def get_bechdel_data(dbfn='sqlitedict_stories.db', force=False):
    path_db=os.path.join(PATH_DATA,dbfn)
    db=SqliteDict(path_db, autocommit=True)
    # return pd.DataFrame(db[k] for k in db)
    o=[]
    keys=list(db.keys())[:5000]
    # random.shuffle(keys)
    for key in tqdm(keys):
        data = db[key]
        txt=data.get('txt')
        if not txt: continue
        if force or not 'bechdel' in data or len(data['bechdel'])!=5 or json_keys-set(data['bechdel'].keys()):
            res = LLM.generate(
                user_prompt=txt,
                system_prompt=system_prompt,
                example_prompts=example_prompts,
                model='gpt-4-turbo'
            )
            if not '{' in res or not '}' in res: continue
            res='{' + res.split('{',1)[-1].split('}',1)[0] + '}'
            try:
                resdata = json.loads(res)
                femchar=resdata.get('female_characters',[])
                resdata['more_than_1_female_character'] = len(femchar)>1
                db[key] = {**data, 'bechdel':resdata}
            except Exception as e:
                print('!!',e)
                continue
        dat=db[key]
        bech=dat.pop('bechdel')
        odat={**dat, **bech}
        o.append(odat)
    return pd.DataFrame(o)

In [None]:
df=get_bechdel_data(force=False)

In [None]:
for k in ['female_characters_speak_to_each_other', 'female_characters_speak_about_something_other_than_a_man', 'more_than_1_female_character']:
    df[k]=pd.to_numeric(df[k],errors='coerce')

In [None]:
df.model.value_counts()

In [None]:
df.groupby('model').mean(numeric_only=True).sort_values('female_characters_speak_about_something_other_than_a_man')