In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip3 install -q -U transformers evaluate trl optimum-quanto rouge_score bitsandbytes flash-attn hqq

In [None]:
import os
import pandas as pd
import evaluate
import numpy as np

os.environ["HF_TOKEN"] = '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'

In [None]:
def create_prompt(inputs: dict) -> str:
    """
    Function that creates prompt for poetry explanation.
    """
    return """
    You are given the poem "{title}" by {poet}.
    <poem>
    {content_before}
    {referent}
    {context_after}
    </poem>
    Explain the meaning of the following lines: "{referent}"
    """.format(
        title=inputs['poem_title'],
        poet=inputs['poet'],
        content_before=inputs['content_before'],
        context_after=inputs['context_after'],
        referent=inputs['referent']
    )

In [None]:
output_dir = "/content/drive/MyDrive/Colab Notebooks/Poemma/checkpoint-llama-3.1-8b-it-28-11/"

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

output_dir = output_dir
tokenizer = AutoTokenizer.from_pretrained(output_dir, add_eos_token=True)
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16 # torch.float16 if GPU doesn't support bfloat16
)

model = AutoModelForCausalLM.from_pretrained(output_dir,
                                             quantization_config=nf4_config,
                                             token=os.environ['HF_TOKEN'],
                                             attn_implementation="flash_attention_2",
                                             device_map="auto",
                                             use_cache=True)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
data_files = {"train": ["/content/drive/MyDrive/Colab Notebooks/Poemma/data/annotations_dataset_train.csv"],
             "test": ["/content/drive/MyDrive/Colab Notebooks/Poemma/data/annotations_dataset_test.csv"]}

In [None]:
from datasets import load_dataset

MAX_SEQUENCE_LENGTH = 512

dataset = load_dataset("csv", data_files=data_files)
labelled = dataset['test']['annotation']

In [None]:
def apply_test_chat_template(example, tokenizer):
    text = create_prompt(example)
    messages = [
        {"role": "system", "content": "You are an expert in poetry."},
        {"role": "user", "content": text}
    ]

    return tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")

#column_names = list(dataset["train"].features)
# dataset = dataset.map(apply_test_chat_template,
#                       fn_kwargs={"tokenizer": tokenizer},
#                       remove_columns=column_names,
#                       desc="Applying chat template"
#                      )

In [None]:
len(dataset['train']), len(dataset['test'])

(2576, 687)

In [None]:
rouge = evaluate.load('rouge')
predictions = ["'Keats is commenting on the nature of blind faith in the church. I was surprised to see this line (“the mind of man is closely bound/In some blind spell”), because Romantic poetry is not known for its social commentary. However, this does encompass the complex human emotions and psyche that are key components of the genre. It seems Keats may have once been inspired by this church scene, but now takes on a more pessimistic viewpoint.'"]
references = ["Keats’ use of “bound” is interesting.  It is a word that has a sense of constraint, but also of protection.  The speaker is suggesting that the mind of man is limited, but this limitation also serves to protect us from the full force of reality. This is a central theme in Keats’ poetry.  He often uses the idea of the “bound” mind to suggest that our perceptions are limited, and that we can only understand the world through our own particular perspective.  This is a key idea in his poem “Ode to a Grecian Urn”, where he"]
results = rouge.compute(predictions=predictions, references=references)
print(results)

{'rouge1': 0.3448275862068966, 'rouge2': 0.05813953488372093, 'rougeL': 0.1954022988505747, 'rougeLsum': 0.1954022988505747}


In [None]:
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader

BATCH_SIZE = 1
eval_dataloader = DataLoader(dataset['test'], batch_size=BATCH_SIZE, shuffle=False)
device='cuda'
metric = evaluate.load("rouge")
generated = []

for batch in tqdm(eval_dataloader, desc="Evaluating"):
    text = create_prompt(batch)
    messages = [
            {"role": "system", "content": "You are an expert in poetry."},
            {"role": "user", "content": text},
        ]

    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
    outputs = model.generate(**inputs,
                            max_new_tokens=128,
                            pad_token_id=tokenizer.eos_token_id)
                            #cache_implementation="quantized",
                            #cache_config={"backend": "quanto", "nbits": 4})
    generated.append(tokenizer.decode(outputs[0], skip_special_tokens=True))

# print(tokenizer.decode(outputs[0], skip_special_tokens=True))



Evaluating:   1%|          | 5/687 [02:18<5:14:14, 27.65s/it]


In [None]:
generated

['system\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are an expert in poetry.user\n\nYou are given the poem "[\'Good-Friday 1613 Riding Westward\']" by [\'John Donne\'].\n    <poem>\n    ["Pleasure or businesse, so, our Soules admit\\nFor their first mover, and are whirld by it.\\nHence is\'t, that I am carryed towards the West\\nThis day, when my Soules forme bends toward the East.\\nThere I should see a Sunne, by rising set,\\nAnd by that setting endlesse day beget;\\nBut that Christ on this Crosse, did rise and fall,\\nSinne had eternally benighted all.\\nYet dare I\'almost be glad, I do not see\\nThat spectacle of too much weight for mee.\\nWho sees Gods face, that is selfe life, must dye;\\nWhat a death were it then to see God dye?"]\n    [\'It made his owne Lieutenant Nature shrinke,\']\n    ["It made his footstoole crack, and the Sunne winke.\\nCould I behold those hands which span the Poles,\\nAnd tune all spheares at once, peirc\'d with those holes

In [None]:
sample = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Poemma/data/annotations_dataset_test.csv').fillna('').sample(n=1).to_dict(orient='records')[0]

In [None]:
print(create_prompt(sample))


    You are given the poem "Sonnet 30" by William Shakespeare.
    <poem>
    
    When to the sessions of sweet silent thought
    I summon up remembrance of things past,
I sigh the lack of many a thing I sought,
And with old woes new wail my dear time's waste:
    </poem>
    Explain the meaning of the following lines: "When to the sessions of sweet silent thought"
    


In [None]:
sample

{'content_before': '',
 'referent': 'When to the sessions of sweet silent thought',
 'context_after': "I summon up remembrance of things past,\nI sigh the lack of many a thing I sought,\nAnd with old woes new wail my dear time's waste:",
 'annotation': 'Shakespeare begins with a subordinate clause to provide an element of suspense; this draws the reader in as they await the main clause. \n The  alliterative , sibilant “s"s in "sessions”, “sweet” and “silent” mimic the sound of sighing, — the word “sigh” appears in line three. \n The first  quatrain  has a gentle, even, heavy rhythm — perfect  iambic pentameter  that is appropriate to the mood of the pensive speaker.',
 'poet': 'William Shakespeare',
 'poem_title': 'Sonnet 30'}

In [None]:
from transformers import QuantoQuantizedCache, QuantizedCacheConfig

text = create_prompt(sample)
messages = [
        {"role": "system", "content": "You are an expert in poetry."},
        {"role": "user", "content": text},
    ]

inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True)#.to("cuda")
#inputs = tokenizer(text, return_tensors="pt").to(device)

print(inputs)

outputs = model.generate(**inputs,
                         max_new_tokens=128,
                         repetition_penalty=1.25,
                         pad_token_id=tokenizer.eos_token_id)#cache_implementation="quantized",
                         #cache_config={"backend": "HQQ", "nbits": 8})

print(tokenizer.decode(outputs[0], skip_special_tokens=True))



{'input_ids': tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   1627,  10263,    220,   2366,     19,    271,   2675,    527,
            459,   6335,    304,  32349,     13, 128009, 128006,    882, 128007,
            271,   2675,    527,   2728,    279,  33894,    330,  46714,   4816,
            220,    966,      1,    555,  12656,  42482,    627,    262,    366,
           5481,    336,    397,   1084,    262,   3277,    311,    279,  16079,
            315,  10437,  21737,   3463,    198,    262,    358,  28645,    709,
           1323,  72669,    315,   2574,   3347,    345,     40,  31238,    279,
           6996,    315,   1690,    264,   3245,    358,  16495,    345,   3112,
            449,   2362,  77825,    502,    289,    607,    856,  25237,    892,
            596,  12571,    512,    262,    694,   5481,    336,    397,    262,
          8301



system

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an expert in poetry.user

You are given the poem "Sonnet 30" by William Shakespeare.
    <poem>
    
    When to the sessions of sweet silent thought
    I summon up remembrance of things past,
I sigh the lack of many a thing I sought,
And with old woes new wail my dear time's waste:
    </poem>
    Explain the meaning of the following lines: "When to the sessions of sweet silent thought"assistant

The speaker is sitting quietly and thinking about their memories, which they find pleasant but also sad because he has lost so much that was important to him. The word “sessions” refers back to the previous line’s idea of going through all his thoughts at once. 
 He uses words like ‘sweet’ and ‘silent’, suggesting this quiet reflection on memory brings peace to the mind. This peacefulness contrasts sharply with what comes next – sadness over losing something precious. 
 
 This sonnet can be read as part of a serie