In [None]:
import sys; sys.path.insert(0,'..')
import sys; sys.path.insert(0,'../../hashstash')
from tqdm.auto import tqdm
tqdm.pandas()
from multiprompt import *
# !pip install rapidfuzz
from rapidfuzz import fuzz
import numpy as np
from hashstash import HashStash
stash = HashStash('poem_completions2', engine='pairtree', serializer='pickle', compress=False)
stash

In [2]:
def get_first_n_lines(txt, n=5):
    lines = []
    nline = 0
    for ln in txt.split('\n'):
        if n and nline >= n: break
        if ln.strip(): 
            nline+=1
            lines.append(f'{nline}\t{ln}')
        else:
            lines.append(ln)
    return '\n'.join(ln for i,ln in enumerate(lines)).strip()


In [3]:
poem="""
And if I have a soul my soul is green
And if it sings it doesn't sing to me
And if it loves it loves externally
Both what it has and what it hasn't seen

And if it's green it may as well be high
And if ambition doesn't give it height
And if it only rises with a fight
Against itself and not against the sky

If all the force it uses leaves me free
This proves it not just definite but right
"""

poem2="""
When most I wink, then do mine eyes best see,
For all the day they view things unrespected;
But when I sleep, in dreams they look on thee,
And darkly bright are bright in dark directed.
Then thou, whose shadow shadows doth make bright,
How would thy shadow's form form happy show
To the clear day with thy much clearer light,
When to unseeing eyes thy shade shines so!
How would, I say, mine eyes be blessed made
By looking on thee in the living day,
When in dead night thy fair imperfect shade
Through heavy sleep on sightless eyes doth stay!
All days are nights to see till I see thee,
And nights bright days when dreams do show thee me.
"""

poem3="""
While the water tower squats Like a Turkish bird
on the ridge and smiles more nicely the other way in green
and bougainvillaea to where the warmest houses face north,

the dead well-to-do are ignored and grumble in the plumbing.
This could be a local myth. There could be many.
Stark living rooms made to be seen into

stare at each other often across the streets and say secrets.
No brass and marble now. But it's more humbling
buying groceries on a teak floor and more Like us

to prefer the peopled ruin. I see the gaping fanlight
in the hallway behind a fruit-stall on the stoep
after rugby and now when it's dry and the roads turn back

to gravel into driveways, dust rises and sweet william and anthracite
are there. There is a real side, where, when you never stopped before,
you hear whispering on corners only you understand.
"""

# print(get_first_n_lines(poem, None))

In [40]:
CORPUS_ROOT = os.path.expanduser('~/lltk_data/corpora/chadwyck_poetry')


def get_txt(id):
    fn = os.path.join(CORPUS_ROOT, 'txt', id) + '.txt'
    if os.path.exists(fn):
        with open(fn) as f:
            return f.read().strip()
    return ""

def get_chadwyck_corpus(
        min_lines=13, 
        max_lines=20,
        ):
    df = pd.read_csv(os.path.join(CORPUS_ROOT, 'metadata.csv'))
    df=df.query(f'{min_lines}<=num_lines<={max_lines}')
    # df=df[~df.author_dob.isna()]
    df=df.query(f'1600<=author_dob<2000')
    df['ybin'] = df.author_dob.apply(lambda x: f'b. {x//50*50:.0f}-{x//50*50+50:.0f}')

    df['poem'] = df['id'].progress_apply(get_txt)

    # df['fstanza'] = df.poem.apply(lambda x: x.strip().split('\n\n')[0].strip())
    # df['fstanza_nline'] = df.fstanza.apply(lambda x: len(x.split('\n')))
    # df['flines'] = df.poem.apply(lambda x: get_first_n_lines(x, first_n_lines))
    # df['flines_nline'] = df.flines.apply(lambda x: len(x.split('\n')))

    # def make_prompt(row):
    #     return f'TITLE: {row.title}\nAUTHOR: {row.attpoet}\nNUMBER OF LINES: {row.num_lines}\n\n{row.flines}'

    # df['prompt'] = df.apply(make_prompt, axis=1)
    return df

In [None]:
df = get_chadwyck_corpus()

In [None]:
df.ybin.value_counts()

In [43]:
def get_system_prompt(first_n_lines=2):
    system_prompt = f'''
The following is the first {first_n_lines} lines from a poem given in the user prompt, whose true number of lines is stated there.

Complete the poem – do this from memory if you know it; if not, imitate its style and theme for the same number of lines as in the original.

Return lines in tab-separated form, starting from line {first_n_lines+1} up to the stated number of lines:

    line#\tline

Do not return any other text besides these tab-separated lines.
'''.strip()
    return system_prompt

def get_user_prompt(txt, first_n_lines=2, **meta):
    prompt=[]
    for k in meta:
        prompt.append(f'{k.upper()}: {meta[k]}')
    
    num_lines = len([x for x in txt.split('\n') if x.strip()])
    prompt.append(f'NUMBER OF LINES: {num_lines}')
    prompt.append('')
    prompt.append(get_first_n_lines(txt, first_n_lines))
    return '\n'.join(prompt)

def complete_poem(txt, first_n_lines=5, _force=False, model='deepseek/deepseek-chat', **meta):
    all_lines = get_first_n_lines(txt, None)
    num_lines = int(all_lines.split('\n')[-1].split('\t')[0])
    user_prompt = get_user_prompt(txt, first_n_lines=first_n_lines, **meta)
    system_prompt = get_system_prompt(first_n_lines=first_n_lines)
    
    agent = Agent('poem_completions')
    # print(system_prompt)
    # print(user_prompt)
    # print('*',end="",flush=True)
    response = agent.generate(
        user_prompt, 
        system_prompt=system_prompt,
        verbose=False, 
        model=model,
        _force=_force,
    )
    
    newlnd={}
    for ln in response.strip().split('\n'):
        if '\t' in ln:
            a,b=ln.split('\t',1)
            newlnd[a]=b
    
    if len(newlnd) != (num_lines - first_n_lines):
        # logger.warning(f'Line length mismatch: {len(o)} output to needed {num_lines - first_n_lines}')
        return pd.DataFrame()

    oldlnd = {}
    snum=1
    for ln in all_lines.split('\n'):
        if not ln.strip():
            snum+=1
        else:
            lnum,line = ln.split('\t',1)
            line_gen = newlnd.get(lnum,'')
            oldlnd[lnum]={
                'stanza_num':snum, 
                'line_num':int(lnum), 
                'line_real':line, 
                'line_gen':line_gen,
                # 'line_sim':fuzz.ratio(line,line_gen) if line and line_gen else np.nan
            }

    return pd.DataFrame(oldlnd.values())
    


In [None]:
complete_poem(poem3)

In [57]:
df_smpl = df.groupby('ybin').sample(1000)
# len(df_smpl)

In [58]:
# df_smpl=df_smpl.to_pickle('data.chadwyck_sample_2025.pkl')
# df_smpl=pd.read_pickle('data.chadwyck_sample_2025.pkl')
# df_smpl.id

In [59]:
# df_smpl = df[df.ybin=='b. 1950-2000'].sample(n=1000)
# df_smpl

In [60]:
@stash.stashed_result
def complete_poem_id(id, first_n_lines=5, model='deepseek/deepseek-chat'):
    poem = get_txt(id)
    df_res = complete_poem(poem, first_n_lines=first_n_lines, model=model)
    return df_res

# complete_poem_id.stash.clear()

In [61]:
# complete_poem_id(id=df_smpl.sample(1).iloc[0].id, first_n_lines=5, model='ollama/llama3.1:8b')

In [62]:
# ids = df_smpl.id.iloc[-15:-10].tolist()
# ids

In [63]:
# for id in ids:
#     complete_poem_id(id=id, first_n_lines=5, model='ollama/llama3.1:8b')

In [64]:
# df_smpl = df[df.attrhyme=='n']
# df_smpl

In [None]:
# def complete_poem_ids()
models = [
    'ollama/olmo2:latest',
    # 'ollama/llama3.1:8b',
    # 'deepseek/deepseek-chat',
    # 'gpt-3.5-turbo',
    # 'gpt-4-turbo',
    # 'claude-3-sonnet-20240229',
]
# model = models[0]
options=[
    {
        'id':id,
        'first_n_lines':5,
        'model':model
    }
    for model in models
    for id in df_smpl.id
]
random.shuffle(options)

# complete_poem_id.stash.assemble_df(with_metadata=True)
# pmap = stash.map(
#     complete_poem_id,
#     options=options,
#     num_proc=1,
#     progress=True,
#     # ordered=False
# )

# # for id in tqdm(df_smpl.id):
# #     complete_poem_id(
# #         id=id,
# #         first_n_lines=5,
# #         model=models[0],
# #     )
# pmap
len(options)

In [66]:
# from multiprocessing import Pool

# def process_option(opt):
#     return complete_poem_id(**opt)

# # Create a pool of 3 processes
# with Pool(processes=3) as pool:
#     # Use tqdm to show progress
#     for x in tqdm(
#         pool.imap(process_option, options),
#         total=len(options),
#         desc="Processing poems"
#     ):
#         pass

In [None]:
iterr=tqdm(options)
for opt in iterr:
    try:
        res = complete_poem_id(
            id=opt['id'],
            first_n_lines=opt['first_n_lines'],
            model=opt['model']
        )
        iterr.set_description(f'got {len(res)} lines from {opt["model"]} on poem {opt["id"]}')
    except Exception as e:
        iterr.set_description(f'!! {e}')

In [68]:
odf = complete_poem_id.stash.assemble_df(with_metadata=True)
odf.to_pickle('data.output.gen_poems.v3.pkl')
odf

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,stanza_num,line_num,line_real,line_gen
_id,_first_n_lines,_model,_version,_timestamp,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
modern/ent2601/Z300573872,5,deepseek/deepseek-chat,1,1.742137e+09,1,1,Silent as a falling leaf,
modern/ent2601/Z300573872,5,deepseek/deepseek-chat,1,1.742137e+09,1,2,To my heart there came a grief:,
modern/ent2601/Z300573872,5,deepseek/deepseek-chat,1,1.742137e+09,1,3,"With a cold and pure despair,",
modern/ent2601/Z300573872,5,deepseek/deepseek-chat,1,1.742137e+09,1,4,"Angerless, it settled there:",
modern/ent2601/Z300573872,5,deepseek/deepseek-chat,1,1.742137e+09,1,5,"And must linger, and must stay,",
...,...,...,...,...,...,...,...,...
english/miltonjo/Z300437814,5,ollama/olmo2:latest,1,1.742178e+09,1,10,"Thy handmaids, clad them over with purple ...","Thy Charity, a living flame, doth ever burn"
english/miltonjo/Z300437814,5,ollama/olmo2:latest,1,1.742178e+09,1,11,"And azure wings, that up they flew so dres...","In charitable deeds, and merciful actions,"
english/miltonjo/Z300437814,5,ollama/olmo2:latest,1,1.742178e+09,1,12,And speak the truth of thee on glorious Theams,"The echoes of thy pious life are heard,"
english/miltonjo/Z300437814,5,ollama/olmo2:latest,1,1.742178e+09,1,13,"Before the Judge, who thenceforth bid thee...","Beyond the grave, where thou hast found thy re..."
