# Load Library

In [None]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [None]:
!pip install rouge_score
!pip install nltk
!pip install bert_score
!pip install peft
!pip install datasets
!pip install --upgrade bitsandbytes

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=991118585107f29b9a35946b2c23914d6a74efc6a07b0937782d09768f68f606
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13
Collecting peft
  Downloadin

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TrainingArguments, Trainer, DataCollatorForSeq2Seq, BitsAndBytesConfig, EarlyStoppingCallback
from sklearn.metrics import accuracy_score
from peft import get_peft_model, LoraConfig
import pandas as pd
import transformers
import torch
import random

from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
from bert_score import score as bert_score
from datasets import Dataset

# Load Model

In [None]:
#model_id = "meta-llama/Meta-Llama-3-8B"
model_id = "mistralai/Mistral-7B-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load the model with 4-bit quantization using bitsandbytes
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Enable 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16,  # Mixed precision for compute (can use float16 or bfloat16)
    bnb_4bit_use_double_quant=True,       # Enable double quantization for more memory savings
    bnb_4bit_quant_type="nf4"             # Use NormalFloat4 (nf4) quantization, recommended for QLoRA
)

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

# Load MedQuad

In [None]:
# Load the evaluation dataset
df = pd.read_csv('/content/medquad.csv')

# Fine-Tune

Format data

In [None]:
# Combine question and answer into the format "Question: ... Answer: ..."
# df['input_text'] = "User: " + df['question'] + "\nAssistant:"
df['input_text'] = df['question']
df['output_text'] = df['answer']


# Remove rows with NaN in either input_text or output_text
df = df.dropna(subset=['input_text', 'output_text'])
# Display the combined result
print(df[['input_text', 'output_text']].head())

                               input_text  \
0                What is (are) Glaucoma ?   
1                  What causes Glaucoma ?   
2     What are the symptoms of Glaucoma ?   
3  What are the treatments for Glaucoma ?   
4                What is (are) Glaucoma ?   

                                         output_text  
0  Glaucoma is a group of diseases that can damag...  
1  Nearly 2.7 million people have glaucoma, a lea...  
2  Symptoms of Glaucoma  Glaucoma can develop in ...  
3  Although open-angle glaucoma cannot be cured, ...  
4  Glaucoma is a group of diseases that can damag...  


In [None]:
# Convert DataFrame to Hugging Face Dataset format
dataset = Dataset.from_pandas(df[['input_text', 'output_text']])

Tokenize data

In [None]:
# Ensure the tokenizer has a proper pad_token set
if tokenizer.pad_token is None:
    #tokenizer.add_special_tokens({'pad_token': '[PAD]'})  # Add PAD token explicitly if not present
    tokenizer.pad_token = tokenizer.eos_token


# Resize model embeddings to match the tokenizer's vocabulary size
model.resize_token_embeddings(len(tokenizer))



def tokenize_function(examples):
    return tokenizer(
        examples['input_text'],
        text_target=examples['output_text'],
        padding="max_length",      # Adjusted to longest to handle dynamic padding
        truncation=True,
        max_length=256,         # Adjust as per your VRAM or reduce if needed
    )

# Apply the tokenization to the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/16407 [00:00<?, ? examples/s]

Split dataset

In [None]:
# Split the dataset into training (80%) and validation (20%) sets
train_test_split = tokenized_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

### Configure LoRA using PEFT

In [None]:
# Disable caching in the model configuration
model.config.use_cache = False
# Enable gradient checkpointing for reduced VRAM usage
model.gradient_checkpointing_enable()

In [None]:
# Define the LoRA configuration
# lora_config = LoraConfig(
#     r=16,                  # LoRA rank, adjust based on available VRAM
#     lora_alpha=32,        # Scaling factor
#     lora_dropout=0.35,     # Dropout rate to prevent overfitting
#     target_modules=["query_key_value","dense"], # Specify the modules to apply LoRA (usually "query_key_value" for LLMs)
#     bias="none",          # Options: "none", "all", or "lora_only"
#     task_type="CAUSAL_LM"  # Task type, since we're working with causal language modeling
# )

lora_config = LoraConfig(
    r=16,                  # LoRA rank, adjust based on available VRAM
    lora_alpha=32,        # Scaling factor
    lora_dropout=0.2,     # Dropout rate to prevent overfitting
   target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
    bias="none",          # Options: "none", "all", or "lora_only"
    task_type="CAUSAL_LM"  # Task type, since we're working with causal language modeling
)

model.enable_input_require_grads()
# Apply the PEFT configuration to the model
model = get_peft_model(model, lora_config)

# Display the trainable parameters to confirm the setup
model.print_trainable_parameters()


trainable params: 23,068,672 || all params: 7,264,800,768 || trainable%: 0.3175


In [None]:
training_args = TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=8,
        gradient_accumulation_steps = 4,  # Adjust to maintain effective batch size
        learning_rate=1e-05,
        num_train_epochs=4,
        warmup_steps=500,
        eval_strategy="steps",            # Evaluate once per 200 steps to monitor progress
        eval_steps=200,                   # Less frequent evaluation
        save_steps=400,                    # Adjusted to be a multiple of eval_steps
        logging_strategy="steps",
        logging_steps = 200,
        max_grad_norm=1.0,
        weight_decay=0.03,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        optim="adamw_torch_fused",
        bf16=True,
    )


# Adjust data collator to use dynamic padding
# data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, padding="longest")
# Adjust data collator to use dynamic padding
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)


# Initialize the trainer with optimized settings
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

Train model

In [None]:
# Begin fine-tuning with LoRA using the PEFT library
trainer.train()

  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
200,8.1391,5.858651
400,5.5612,5.271283


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
200,8.1391,5.858651
400,5.5612,5.271283
600,5.0481,4.878934
800,4.8031,4.724863
1000,4.6423,4.654975
1200,4.651,4.607931
1400,4.564,4.588076
1600,4.5415,4.574691


  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


TrainOutput(global_step=1640, training_loss=5.227952752462247, metrics={'train_runtime': 8431.5005, 'train_samples_per_second': 6.227, 'train_steps_per_second': 0.195, 'total_flos': 5.749461230372782e+17, 'train_loss': 5.227952752462247, 'epoch': 3.9975624619134673})

Save

In [None]:
# Save the final model to the specified output directory
trainer.save_model(output_dir="./final_model")
tokenizer.save_pretrained("./final_model")
model.config.save_pretrained("./final_model")

# Save Model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp -r ./results /content/drive/MyDrive/fine_tuned_model

^C


In [None]:
!cp -r ./final_model /content/drive/MyDrive/fine_tuned_model

*italicized text* *italicized text*