In [1]:
import sys; sys.path.insert(0,'..')
from tqdm.auto import tqdm
tqdm.pandas()
from multiprompt import *

In [2]:
def get_first_n_lines(txt, n=5):
    lines = []
    nline = 0
    for ln in txt.split('\n'):
        if n and nline >= n: break
        if ln.strip(): 
            nline+=1
            lines.append(f'{nline}\t{ln}')
        else:
            lines.append(ln)
    return '\n'.join(ln for i,ln in enumerate(lines)).strip()


In [3]:
poem="""
And if I have a soul my soul is green
And if it sings it doesn't sing to me
And if it loves it loves externally
Both what it has and what it hasn't seen

And if it's green it may as well be high
And if ambition doesn't give it height
And if it only rises with a fight
Against itself and not against the sky

If all the force it uses leaves me free
This proves it not just definite but right
"""

poem2="""
When most I wink, then do mine eyes best see,
For all the day they view things unrespected;
But when I sleep, in dreams they look on thee,
And darkly bright are bright in dark directed.
Then thou, whose shadow shadows doth make bright,
How would thy shadow's form form happy show
To the clear day with thy much clearer light,
When to unseeing eyes thy shade shines so!
How would, I say, mine eyes be blessed made
By looking on thee in the living day,
When in dead night thy fair imperfect shade
Through heavy sleep on sightless eyes doth stay!
All days are nights to see till I see thee,
And nights bright days when dreams do show thee me.
"""

poem3="""
While the water tower squats Like a Turkish bird
on the ridge and smiles more nicely the other way in green
and bougainvillaea to where the warmest houses face north,

the dead well-to-do are ignored and grumble in the plumbing.
This could be a local myth. There could be many.
Stark living rooms made to be seen into

stare at each other often across the streets and say secrets.
No brass and marble now. But it's more humbling
buying groceries on a teak floor and more Like us

to prefer the peopled ruin. I see the gaping fanlight
in the hallway behind a fruit-stall on the stoep
after rugby and now when it's dry and the roads turn back

to gravel into driveways, dust rises and sweet william and anthracite
are there. There is a real side, where, when you never stopped before,
you hear whispering on corners only you understand.
"""

# print(get_first_n_lines(poem, None))

In [4]:

def get_chadwyck_corpus(
        min_lines=10, 
        max_lines=20,
        corpus_root = '/Users/ryan/lltk_data/corpora/chadwyck_poetry', 
        first_n_lines=5,
        ):
    df = pd.read_csv(os.path.join(corpus_root, 'metadata.csv'))
    df=df.query(f'{min_lines}<=num_lines<={max_lines}')
    def get_txt(id):
        fn = os.path.join(corpus_root, 'txt', id) + '.txt'
        if os.path.exists(fn):
            with open(fn) as f:
                return f.read().strip()
        return ""

    df['poem'] = df['id'].progress_apply(get_txt)

    # df['fstanza'] = df.poem.apply(lambda x: x.strip().split('\n\n')[0].strip())
    # df['fstanza_nline'] = df.fstanza.apply(lambda x: len(x.split('\n')))
    # df['flines'] = df.poem.apply(lambda x: get_first_n_lines(x, first_n_lines))
    # df['flines_nline'] = df.flines.apply(lambda x: len(x.split('\n')))

    # def make_prompt(row):
    #     return f'TITLE: {row.title}\nAUTHOR: {row.attpoet}\nNUMBER OF LINES: {row.num_lines}\n\n{row.flines}'

    # df['prompt'] = df.apply(make_prompt, axis=1)
    return df

In [5]:
df = get_chadwyck_corpus()

  0%|          | 0/103103 [00:00<?, ?it/s]

In [8]:
def get_system_prompt(first_n_lines=2):
    system_prompt = f'''
The following is the first {first_n_lines} lines from a poem given in the user prompt, whose true number of lines is stated there.

Complete the poem – do this from memory if you know it; if not, imitate its style and theme for the same number of lines as in the original.

Return lines in tab-separated form, starting from line {first_n_lines+1} up to the stated number of lines:

    line#\tline

Do not return any other text besides these tab-separated lines.
'''.strip()
    return system_prompt

def get_user_prompt(txt, first_n_lines=2, **meta):
    prompt=[]
    for k in meta:
        prompt.append(f'{k.upper()}: {meta[k]}')
    
    num_lines = len([x for x in txt.split('\n') if x.strip()])
    prompt.append(f'NUMBER OF LINES: {num_lines}')
    prompt.append('')
    prompt.append(get_first_n_lines(txt, first_n_lines))
    return '\n'.join(prompt)



def complete_poem(txt, first_n_lines=2, _force=False, model='deepseek/deepseek-chat', **meta):
    all_lines = get_first_n_lines(txt, None)
    num_lines = int(all_lines.split('\n')[-1].split('\t')[0])
    user_prompt = get_user_prompt(txt, first_n_lines=first_n_lines, **meta)
    system_prompt = get_system_prompt(first_n_lines=first_n_lines)
    
    agent = Agent('poem_completions')
    print(system_prompt)
    print(user_prompt)
    print('*',end="",flush=True)
    response = agent.generate(
        user_prompt, 
        system_prompt=system_prompt,
        verbose=True, 
        model=model,
        _force=_force,
    )
    
    o=[]
    for ln in response.strip().split('\n'):
        if '\t' in ln:
            o.append(ln.strip().split('\t',1))
    
    if len(o) != (num_lines - first_n_lines):
        logger.warning(f'Line length mismatch: {len(o)} output to needed {num_lines - first_n_lines}')

    return o
    


In [9]:
# row=df.query('author_dob>=1970').sample(n=1).iloc[0]
# print(row.attpoet)
# print()
# print(row.poem)

In [10]:
complete_poem(poem3, 5, _force=False)

The following is the first 5 lines from a poem given in the user prompt, whose true number of lines is stated there.

Complete the poem – do this from memory if you know it; if not, imitate its style and theme for the same number of lines as in the original.

Return lines in tab-separated form, starting from line 6 up to the stated number of lines:

    line#	line

Do not return any other text besides these tab-separated lines.
NUMBER OF LINES: 15

1	While the water tower squats Like a Turkish bird
2	on the ridge and smiles more nicely the other way in green
3	and bougainvillaea to where the warmest houses face north,

4	the dead well-to-do are ignored and grumble in the plumbing.
5	This could be a local myth. There could be many.
*

[['6', 'The streets are quiet, the air is still,'],
 ['7', 'and shadows stretch across the hill.'],
 ['8', 'The wind whispers through the trees,'],
 ['9', 'carrying secrets on its breeze.'],
 ['10', 'The moon rises, pale and thin,'],
 ['11', 'casting light where night begins.'],
 ['12', 'The stars blink in the velvet sky,'],
 ['13', 'as if they know the reason why.'],
 ['14', 'The town sleeps, its dreams unfold,'],
 ['15', 'in stories told, in whispers old.']]

In [11]:
# df=get_chadwyck_corpus(first_n_lines=first_n_lines)

In [12]:
# row = df.query('author_dob>=1950').sample(n=1).iloc[0]
# # row = df[df.attpoet.str.contains('Shakespeare')].sample(n=1).iloc[0]
# user_prompt = row.prompt
# print(user_prompt)
# print('\n---------\n')
# print(row.poem)

In [13]:
agent = Agent('poem_completions')
print(user_prompt)
response = agent.generate(
    user_prompt, 
    system_prompt=system_prompt,
    verbose=True, 
    model='deepseek/deepseek-chat',
    _force=True,
    # model='ollama/llama3:instruct',
    # model='gpt-3.5-turbo'
)

NameError: name 'user_prompt' is not defined