# Generative poems

## Prompting for un/rhyming poems

In [1]:
import sys
sys.path.append('../')
from generative_formalism import *

In [2]:
def clean_genai_poem(txt):
    stanzas = txt.split('\n\n')
    stanzas = [st.strip() for st in stanzas if st.strip().count('\n')>0]
    return '\n\n'.join(stanzas)

def get_num_lines(txt):
    return len([x for x in txt.split('\n') if x.strip()])

In [3]:
def collect_prev_genai_promptings(path_pkl=PATH_PKL, path_json=PATH_JSON, min_lines=10, max_lines=100, overwrite=False):
    def get_df_poems():
        if path_pkl and os.path.exists(path_pkl):
            df_poems1 = pd.read_pickle(path_pkl).fillna('').query('prompt!=""').rename(columns={'poem':'txt'})
        else:
            df_poems1 = pd.DataFrame()
        # df_poems['num_lines'] = pd.to_numeric(df_poems['num_lines'], errors='coerce')
        # odf = df_poems.query(f'num_lines >= {min_lines}')
        return df_poems1
    
    df1 = get_df_poems()
    prompt_to_type = dict(zip(df1.prompt, df1.prompt_type))

    def get_jsonl_data():
        if path_json and os.path.exists(path_json):
            newdata = []
            with open(path_json) as f: 
                for d in json.load(f):
                    newdata.append({
                        'model':d['prompt']['model'],
                        'temp':d['prompt']['temperature'],
                        'prompt':d['prompt']['user_prompt'],
                        'txt':d['response'].split('</think>')[-1].strip(),
                        'num_lines':len([x for x in d['response'].split('</think>')[-1].strip().split('\n') if x.strip()]),
                    })
            df2=pd.DataFrame(newdata)
            df2['prompt_type'] = df2.prompt.apply(lambda x: prompt_to_type.get(x, 'Unknown'))
            return df2
        else:
            return pd.DataFrame()

    df2 = get_jsonl_data()
    df_prompts = pd.concat([df1, df2])
    for ncol in ['temp','num_lines']:
        df_prompts[ncol] = pd.to_numeric(df_prompts[ncol], errors='coerce')

    cols = {
        'model':'model',
        'temp':'temp',
        'prompt':'prompt',
        'prompt_type':'prompt_type',
        'txt':'txt',
        'num_lines':'num_lines',
    }

    df_prompts=df_prompts[cols.keys()].rename(columns=cols).sample(frac=1)
    df_prompts['id'] = [get_id_hash_str(f'{model}__{temp:.4f}__{prompt}__{txt}') for model,temp,prompt,txt in zip(df_prompts.model,df_prompts.temp,df_prompts.prompt,df_prompts.txt)]
    df_prompts['id_hash'] = [get_id_hash(id) for id in df_prompts.id]
    df_prompts['rhyme'] = [('y' if 'DO_rhyme' == x else ('n' if 'do_NOT_rhyme' == x else '?')) for x in df_prompts.prompt_type]
    df_prompts = df_prompts.sort_values('id_hash')
    df_prompts['txt'] = df_prompts.txt.apply(clean_genai_poem)
    df_prompts['num_lines'] = df_prompts.txt.apply(get_num_lines)
    
    df_prompts = df_prompts.query(f'num_lines >= {min_lines} and num_lines <= {max_lines}')
    odf = df_prompts.drop_duplicates('id').set_index('id')

    excl_prompts=[
        'Write an unryhmed poem in the style of Shakespeare\'s dramatic monologues.',
        'Write a poem in the style of Shakespeare\'s dramatic monologues.',
        'Write a poem in the style of e.e. cummings',
        # 'Write a poem in the style of Walt Whitman.',
        # "Write an ryhmed poem in the style of Shakespeare's sonnets.",
        # "Write a poem in the style of Emily Dickinson.",
        # "Write a poem in the style of Walt Whitman.",
        'Write a poem in the style of Wallace Stevens.',
        # "Write a poem in blank verse.",
        'Continue the following poem:\n\nTyping, typing, fingers on the keyboard\nThe keys crack and bend under sweat and weight,\n'
    ]

    odf=odf[~odf.prompt.isin(excl_prompts)]

    save_sample(odf, '../data/corpus_genai_promptings.csv.gz', overwrite=overwrite)
    return odf

In [6]:
collect_prev_genai_promptings()

Saved sample to ../data/corpus_genai_promptings.csv.gz


Unnamed: 0_level_0,model,temp,prompt,prompt_type,txt,num_lines,id_hash,rhyme
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
8013cee2,ollama/llama3.1:70b,0.70000,Write a poem (with 20+ lines) that rhymes.,DO_rhyme,"In twilight's hush, where shadows play,\nThe s...",20,110,y
9b5fe215,ollama/llama3.1:8b,0.70000,Write a poem in free verse.,do_NOT_rhyme,Silence settles like a blanket\nover the city ...,12,122,n
d5f25926,ollama/llama2-uncensored:latest,0.70000,Write a long poem that does NOT rhyme.,do_NOT_rhyme,"The world is vast and endless,\nA sea of thoug...",24,382,n
21bbbb32,ollama/llama3.1:70b,0.70000,Write a poem in heroic couplets.,DO_rhyme,"In realms of dawn, where shadows flee,\nThe mo...",12,619,y
29d0584f,ollama/olmo2,0.70000,Write a poem that does NOT rhyme.,do_NOT_rhyme,"In the heart of the forest, under the moon's g...",12,682,n
...,...,...,...,...,...,...,...,...
e3f6e73c,claude-3-opus-20240229,0.55394,Write a poem in heroic couplets.,DO_rhyme,"In days of old, when knights were bold and tru...",16,999706,y
228c224a,ollama/llama2-uncensored:latest,0.70000,Write a poem that does rhyme.,DO_rhyme,"A flower blooms with vibrant hue,\nIn colors b...",10,999792,y
a82bbda9,ollama/olmo2:13b,0.70000,Write a poem (with 20+ lines) that does NOT rh...,do_NOT_rhyme,In the quiet hum of morning light \nA whisper...,44,999830,n
2815c431,ollama/llama3.1:70b,0.70000,Write a long poem.,MAYBE_rhyme,"In twilight's hush, where shadows play,\nA mid...",36,999885,?


### Replicating

In [5]:
# todo