In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/244.2 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m102.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
# Load dataset
dataset = load_dataset("gamino/wiki_medical_terms", split="train")

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, "float16")

#Setting up Double Quantization with float 16 as dtype
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

In [None]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    "aboonaji/llama2finetune-v2",
    quantization_config=bnb_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained("aboonaji/llama2finetune-v2", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/174 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [None]:
# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=8,
    lora_dropout=0.25,
    target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj','lm_head'],
    r=16,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=200,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard"
)

# Custom Function for Preprocessing text
def formatting_func(text):
    text.lower()
    text = ''.join(e for e in text if e.isalnum() or e == ' ')
    return text

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="page_text",
    max_seq_length=256,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    formatting_func=formatting_func,
)




Map:   0%|          | 0/6861 [00:00<?, ? examples/s]

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.8544
20,1.7251
30,1.6323
40,1.6023
50,1.7807
60,1.5346
70,1.5229
80,1.5971
90,1.5137
100,1.6365


NameError: name 'new_model' is not defined

In [None]:
trainer.model.save_pretrained("my_llama")

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our old model
prompt = "Explain Paracetomol poisoning."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1000)
result = pipe(f"<s>[INST] {formatting_func(prompt)} [/INST]")
print(result[0]['generated_text'])



<s>[INST] Explain Paracetomol poisoning [/INST]  Paracetamol poisoning, also known as acetaminophen poisoning, is a medical condition that occurs when a person ingests too much paracetamol (acetaminophen), a common pain reliever and fever reducer found in many over-the-counter medications. It can cause liver damage, liver failure, and even death if not treated promptly.

Signs and symptoms
The symptoms of paracetamol poisoning can vary depending on the amount of paracetamol ingested and the time elapsed since ingestion. The most common symptoms include:

* Nausea and vomiting
* Abdominal pain
* Headache
* Fatigue
* Confusion
* Dizziness
* Sweating
* Yellowing of the skin and whites of the eyes (jaundice)

In severe cases, paracetamol poisoning can cause:

* Liver failure
* Kidney failure
* Respiratory failure
* Cardiac arrest
* Coma
* Death

Causes
Paracetamol poisoning occurs when a person ingests too much paracetamol. The amount of paracetamol that can cause poisoning varies dependin

In [None]:
# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

42759

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch

model = AutoPeftModelForCausalLM.from_pretrained("my_llama", load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained("aboonaji/llama2finetune-v2")

model = model.to("cuda")
model.eval()
inputs = tokenizer("Explain Paracetomol poisoning.", return_tensors="pt")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=200)
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])



Explain Paracetomol poisoning.
Paracetamol poisoning, also known as acetaminophen poisoning, is the accidental or intentional ingestion of too much paracetamol (acetaminophen), a common painkiller and fever reducer. It is a common cause of liver damage and liver failure. It is also a common cause of death.

Signs and symptoms
The signs and symptoms of paracetamol poisoning can vary depending on the amount of paracetamol taken and the time elapsed since ingestion. The most common symptoms are nausea, vomiting, abdominal pain, and fever. In severe cases, liver failure, liver necrosis, and death may occur.

Treatment
Treatment of paracetamol poisoning involves supportive care, such as intravenous fluids and oxygen, and


In [None]:
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=500)
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

Explain Paracetomol poisoning.
Paracetamol poisoning, also known as acetaminophen poisoning, is the accidental or intentional ingestion of too much paracetamol (acetaminophen), a common painkiller and fever reducer. It is a common cause of liver damage and liver failure. It is also a common cause of death.

Signs and symptoms
The signs and symptoms of paracetamol poisoning can vary depending on the amount of paracetamol taken and the time elapsed since ingestion. The most common symptoms are nausea, vomiting, abdominal pain, and fever. In severe cases, liver failure, liver necrosis, and death may occur.

Treatment
Treatment of paracetamol poisoning involves supportive care, such as intravenous fluids and oxygen, and administration of an antidote, such as N-acetylcysteine (NAC). In severe cases, a liver transplant may be required.

Prevention
Prevention of paracetamol poisoning involves proper dosing and storage of paracetamol products, and avoiding overdose. It is important to follow t

In [None]:
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=1000)
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

Explain Paracetomol poisoning.
Paracetamol poisoning, also known as acetaminophen poisoning, is the accidental or intentional ingestion of too much paracetamol (acetaminophen), a common painkiller and fever reducer. It is a common cause of liver damage and liver failure. It is also a common cause of death.

Signs and symptoms
The signs and symptoms of paracetamol poisoning can vary depending on the amount of paracetamol taken and the time elapsed since ingestion. The most common symptoms are nausea, vomiting, abdominal pain, and fever. In severe cases, liver failure, liver necrosis, and death may occur.

Treatment
Treatment of paracetamol poisoning involves supportive care, such as intravenous fluids and oxygen, and administration of an antidote, such as N-acetylcysteine (NAC). In severe cases, a liver transplant may be required.

Prevention
Prevention of paracetamol poisoning involves proper dosing and storage of paracetamol products, and avoiding overdose. It is important to follow t