In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip3 install -q -U transformers evaluate trl optimum-quanto rouge_score bitsandbytes flash-attn hqq

In [1]:
import os
import pandas as pd
import evaluate
import numpy as np

os.environ["HF_TOKEN"] = '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'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def create_prompt(inputs: dict) -> str:
    """
    Function that creates prompt for poetry explanation.
    """
    return """
    You are given the poem "{title}" by {poet}.
    <poem>
    {content_before}
    {referent}
    {context_after}
    </poem>
    Explain the meaning of the following lines: "{referent}"
    """.format(
        title=inputs['poem_title'],
        poet=inputs['poet'],
        content_before=inputs['content_before'],
        context_after=inputs['context_after'],
        referent=inputs['referent']
    )

In [3]:
#output_dir = "/content/drive/MyDrive/Colab Notebooks/Poemma/checkpoint-llama-3.1-8b-it-28-11/"
output_dir = "./outputs/checkpoint-631"

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

output_dir = output_dir
tokenizer = AutoTokenizer.from_pretrained(output_dir, add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16 # torch.float16 if GPU doesn't support bfloat16
)

model = AutoModelForCausalLM.from_pretrained(output_dir,
                                             quantization_config=nf4_config,
                                             token=os.environ['HF_TOKEN'],
                                             attn_implementation="flash_attention_2",
                                             device_map="auto",
                                             use_cache=True)


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.35s/it]


In [3]:
drive_location = "/content/drive/MyDrive/Colab Notebooks/Poemma/"

data_files = {"train": ["./data/annotations_dataset_train.csv"],
             "test": ["./data/annotations_dataset_test.csv"]}

In [4]:
from datasets import load_dataset

MAX_SEQUENCE_LENGTH = 512

dataset = load_dataset("csv", data_files=data_files)
labelled = dataset['test']['annotation']

In [10]:
def apply_test_chat_template(example, tokenizer):
    text = create_prompt(example)
    messages = [
        {"role": "system", "content": "You are an expert in poetry."},
        {"role": "user", "content": text}
    ]

    return tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")

#column_names = list(dataset["train"].features)
# dataset = dataset.map(apply_test_chat_template,
#                       fn_kwargs={"tokenizer": tokenizer},
#                       remove_columns=column_names,
#                       desc="Applying chat template"
#                      )

In [11]:
len(dataset['train']), len(dataset['test'])

(2576, 687)

In [None]:
rouge = evaluate.load('rouge')
predictions = ["'Keats is commenting on the nature of blind faith in the church. I was surprised to see this line (“the mind of man is closely bound/In some blind spell”), because Romantic poetry is not known for its social commentary. However, this does encompass the complex human emotions and psyche that are key components of the genre. It seems Keats may have once been inspired by this church scene, but now takes on a more pessimistic viewpoint.'"]
references = ["Keats’ use of “bound” is interesting.  It is a word that has a sense of constraint, but also of protection.  The speaker is suggesting that the mind of man is limited, but this limitation also serves to protect us from the full force of reality. This is a central theme in Keats’ poetry.  He often uses the idea of the “bound” mind to suggest that our perceptions are limited, and that we can only understand the world through our own particular perspective.  This is a key idea in his poem “Ode to a Grecian Urn”, where he"]
results = rouge.compute(predictions=predictions, references=references)
print(results)

{'rouge1': 0.3448275862068966, 'rouge2': 0.05813953488372093, 'rougeL': 0.1954022988505747, 'rougeLsum': 0.1954022988505747}


https://huggingface.co/docs/transformers/main/llm_optims?spec-decoding=sampling&static-kv=basic+usage%3A+generation_config

In [None]:
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader

#model.generation_config.cache_implementation = "static"
#model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)

BATCH_SIZE = 1
eval_dataloader = DataLoader(dataset['test'], batch_size=BATCH_SIZE, shuffle=False)
device='cuda'
generated = []


for batch in tqdm(eval_dataloader, desc="Evaluating"):
    text = create_prompt(batch)
    messages = [
            {"role": "system", "content": "You are an expert in poetry."},
            {"role": "user", "content": text},
        ]

    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
    outputs = model.generate(**inputs,
                            max_new_tokens=128,
                            use_cache=True,
                            #cache_implementation="static",
                            pad_token_id=tokenizer.eos_token_id,
                            repetition_penalty=1.25)
                            #cache_implementation="quantized",
                            #cache_config={"backend": "quanto", "nbits": 4})
    generated.append(tokenizer.batch_decode(outputs, skip_special_tokens=True))
    print(generated)

# print(tokenizer.decode(outputs[0], skip_special_tokens=True))



In [None]:
sample = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Poemma/data/annotations_dataset_test.csv').fillna('').sample(n=1).to_dict(orient='records')[0]

In [None]:
print(create_prompt(sample))


    You are given the poem "Sonnet 30" by William Shakespeare.
    <poem>
    
    When to the sessions of sweet silent thought
    I summon up remembrance of things past,
I sigh the lack of many a thing I sought,
And with old woes new wail my dear time's waste:
    </poem>
    Explain the meaning of the following lines: "When to the sessions of sweet silent thought"
    


In [None]:
sample

{'content_before': '',
 'referent': 'When to the sessions of sweet silent thought',
 'context_after': "I summon up remembrance of things past,\nI sigh the lack of many a thing I sought,\nAnd with old woes new wail my dear time's waste:",
 'annotation': 'Shakespeare begins with a subordinate clause to provide an element of suspense; this draws the reader in as they await the main clause. \n The  alliterative , sibilant “s"s in "sessions”, “sweet” and “silent” mimic the sound of sighing, — the word “sigh” appears in line three. \n The first  quatrain  has a gentle, even, heavy rhythm — perfect  iambic pentameter  that is appropriate to the mood of the pensive speaker.',
 'poet': 'William Shakespeare',
 'poem_title': 'Sonnet 30'}

In [None]:
from transformers import QuantoQuantizedCache, QuantizedCacheConfig

text = create_prompt(sample)
messages = [
        {"role": "system", "content": "You are an expert in poetry."},
        {"role": "user", "content": text},
    ]

inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True)#.to("cuda")
#inputs = tokenizer(text, return_tensors="pt").to(device)

print(inputs)

outputs = model.generate(**inputs,
                         max_new_tokens=128,
                         repetition_penalty=1.25,
                         pad_token_id=tokenizer.eos_token_id)#cache_implementation="quantized",
                         #cache_config={"backend": "HQQ", "nbits": 8})

print(tokenizer.decode(outputs[0], skip_special_tokens=True))



{'input_ids': tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   1627,  10263,    220,   2366,     19,    271,   2675,    527,
            459,   6335,    304,  32349,     13, 128009, 128006,    882, 128007,
            271,   2675,    527,   2728,    279,  33894,    330,  46714,   4816,
            220,    966,      1,    555,  12656,  42482,    627,    262,    366,
           5481,    336,    397,   1084,    262,   3277,    311,    279,  16079,
            315,  10437,  21737,   3463,    198,    262,    358,  28645,    709,
           1323,  72669,    315,   2574,   3347,    345,     40,  31238,    279,
           6996,    315,   1690,    264,   3245,    358,  16495,    345,   3112,
            449,   2362,  77825,    502,    289,    607,    856,  25237,    892,
            596,  12571,    512,    262,    694,   5481,    336,    397,    262,
          8301



system

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an expert in poetry.user

You are given the poem "Sonnet 30" by William Shakespeare.
    <poem>
    
    When to the sessions of sweet silent thought
    I summon up remembrance of things past,
I sigh the lack of many a thing I sought,
And with old woes new wail my dear time's waste:
    </poem>
    Explain the meaning of the following lines: "When to the sessions of sweet silent thought"assistant

The speaker is sitting quietly and thinking about their memories, which they find pleasant but also sad because he has lost so much that was important to him. The word “sessions” refers back to the previous line’s idea of going through all his thoughts at once. 
 He uses words like ‘sweet’ and ‘silent’, suggesting this quiet reflection on memory brings peace to the mind. This peacefulness contrasts sharply with what comes next – sadness over losing something precious. 
 
 This sonnet can be read as part of a serie

https://www.philschmid.de/fine-tune-llms-in-2024-with-trl#4-fine-tune-llm-using-trl-and-the-sfttrainer

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftConfig, PeftModel
import torch

output_dir = "./outputs/checkpoint-631"
config = PeftConfig.from_pretrained(output_dir)
base_model_name = config.base_model_name_or_path

base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float16)
peft_model_id = "prettyvampire/poemma"
model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.float16)
merged_model = model.merge_and_unload()
merged_model.save_pretrained('./outputs/poemma_merged_fp_16')

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 4/4 [00:21<00:00,  5.37s/it]


In [7]:
#model = PeftModel.from_pretrained(output_dir)
#model.merge_and_unload()
#model.save_pretrained("./outputs/merged")

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

merged_dir = "./outputs/poemma_merged_fp_16"
# config = PeftConfig.from_pretrained(output_dir)
# base_model_name = config.base_model_name_or_path
# print(base_model_name)

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16 # torch.float16 if GPU doesn't support bfloat16
)

#tokenizer = AutoTokenizer.from_pretrained(merged_dir, add_eos_token=True)

model = AutoModelForCausalLM.from_pretrained(
     merged_dir, 
     device_map="auto",
     quantization_config=nf4_config,
     token=os.environ['HF_TOKEN'],
     attn_implementation="flash_attention_2",
     use_cache=True)


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 4/4 [00:12<00:00,  3.09s/it]


In [8]:
model.push_to_hub('poemma')

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
model-00001-of-00002.safetensors:   0%|                                                    | 0.00/4.65G [00:00<?, ?B/s]
Upload 2 LFS files:   0%|                                                                        | 0/2 [00:00<?, ?it/s][A

model-00001-of-00002.safetensors:   0%|                                          | 98.3k/4.65G [00:00<1:22:05, 944kB/s][A[A

model-00001-of-00002.safetensors:   0%|                                            | 950k/4.65G [00:00<14:33, 5.32MB/s][A[A

model-00001-of-00002.safetensors:   0%|                                           | 2.47M/4.65G [00:00<08:41, 8.92MB/s][A[A

model-00001-of-00002.safetensors:   0%|                                           | 3.90M/4.65G [00:00<07:38, 

CommitInfo(commit_url='https://huggingface.co/prettyvampire/poemma/commit/5c55326ed77265ef1e89d22e14687b4d54015b11', commit_message='Upload LlamaForCausalLM', commit_description='', oid='5c55326ed77265ef1e89d22e14687b4d54015b11', pr_url=None, pr_revision=None, pr_num=None)

In [10]:
base_model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [8]:
merged_model.save_pretrained('merged_poemma')

KeyboardInterrupt: 

In [2]:
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader

#model.generation_config.cache_implementation = "static"
#model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)

BATCH_SIZE = 1
eval_dataloader = DataLoader(dataset['test'], batch_size=BATCH_SIZE, shuffle=False)
device='cuda'
generated = []

from transformers import AutoModelForCausalLM
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
peft_model_id = "alignment-handbook/zephyr-7b-sft-lora"
model = PeftModel.from_pretrained(base_model, peft_model_id)
model.merge_and_unload()


for batch in tqdm(eval_dataloader, desc="Evaluating"):
    text = create_prompt(batch)
    messages = [
            {"role": "system", "content": "You are an expert in poetry."},
            {"role": "user", "content": text},
        ]

    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
    outputs = model.generate(**inputs,
                            max_new_tokens=128,
                            use_cache=True,
                            #cache_implementation="static",
                            pad_token_id=tokenizer.eos_token_id,
                            repetition_penalty=1.25)
                            #cache_implementation="quantized",
                            #cache_config={"backend": "quanto", "nbits": 4})
    generated.append(tokenizer.batch_decode(outputs, skip_special_tokens=True))
    print(generated)

NameError: name 'dataset' is not defined