In [1]:
!pip install -q -U bitsandbytes transformers peft accelerate datasets scipy einops evaluate trl rouge_score

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m99.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m342.1/342.1 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.9/484.9 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.6/37.6 MB[0m [31m47.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import numpy as np
import pandas as pd
import os
import torch
import time
from pynvml import *

# Disable Weights & Biases
os.environ['WANDB_DISABLED'] = "true"

# Library Imports
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    GenerationConfig,
    set_seed
)
from tqdm import tqdm
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from huggingface_hub import interpreter_login, notebook_login, notebook_login, hf_hub_download


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [3]:
# %% [GPU Utilization Check]
def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")

In [4]:
# %% [Dataset Loading]
huggingface_dataset_name = "neil-code/dialogsum-test"
try:
    dataset = load_dataset(huggingface_dataset_name)
except Exception as e:
    print(f"Failed to load dataset: {e}")
    print("Attempting to load dataset from local cache...")
    dataset = load_dataset(huggingface_dataset_name, download_mode="force_redownload")


# %% [Model & Tokenizer Setup]
model_name = 'microsoft/phi-2'
compute_dtype = getattr(torch, "float16")

# 4-bit Quantization Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

README.md:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

train.csv:   0%|          | 0.00/1.81M [00:00<?, ?B/s]

validation.csv:   0%|          | 0.00/441k [00:00<?, ?B/s]

test.csv:   0%|          | 0.00/447k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1999 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/499 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/499 [00:00<?, ? examples/s]


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



Enter your token (input will not be visible):  ········
Add token as git credential? (Y/n)  n


In [6]:

# Load Base Model with Error Handling
try:
    original_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=bnb_config,
        trust_remote_code=True,
        use_auth_token=True
    )
except Exception as e:
    print(f"Failed to load model from Hugging Face Hub: {e}")
    print("Attempting to load model from local cache...")
    original_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=bnb_config,
        trust_remote_code=True,
        use_auth_token=True,
        local_files_only=True  # Force loading from local cache
    )

# Tokenizer Configuration
try:
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
        padding_side="left",
        add_eos_token=True,
        add_bos_token=True,
        use_fast=False
    )
except Exception as e:
    print(f"Failed to load tokenizer from Hugging Face Hub: {e}")
    print("Attempting to load tokenizer from local cache...")
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
        padding_side="left",
        add_eos_token=True,
        add_bos_token=True,
        use_fast=False,
        local_files_only=True  # Force loading from local cache
    )
tokenizer.pad_token = tokenizer.eos_token



config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

In [7]:
# %% [LoRA Configuration]
peft_config = LoraConfig(
    r=64,  # Increased from 32
    lora_alpha=64,
    target_modules=[
        "Wqkv",    # Phi-2 specific attention layers
        "out_proj",
        "fc1",
        "fc2",
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

In [8]:
# Prepare Model for Training
original_model = prepare_model_for_kbit_training(original_model)
peft_model = get_peft_model(original_model, peft_config)

In [9]:
# %% [Training Arguments]
output_dir = "./phi-2-dialogsum-ft"
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=2,      # Reduced for memory optimization
    gradient_accumulation_steps=4,       # Increased for stability
    warmup_steps=50,
    max_steps=500,                       # Updated per requirements
    learning_rate=3e-4,                  # Increased learning rate
    logging_steps=50,                     # Updated logging interval
    evaluation_strategy="steps",
    eval_steps=50,                        # Updated evaluation interval
    optim="paged_adamw_8bit",
    save_strategy="steps",
    save_steps=100,
    fp16=True,
    report_to="none",
    gradient_checkpointing=True,          # Enabled for memory savings
    group_by_length=True,
)



In [10]:
# %% [Data Preprocessing]
def create_prompt_formats(sample):
    """Format training samples with instruction templates"""
    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    INSTRUCTION_KEY = "### Instruct: Summarize the below conversation."
    RESPONSE_KEY = "### Output:"
    END_KEY = "### End"
    
    formatted_prompt = "\n\n".join([
        f"{INTRO_BLURB}",
        f"{INSTRUCTION_KEY}\n{sample['dialogue']}",
        f"{RESPONSE_KEY}\n{sample['summary']}",
        END_KEY
    ])
    sample["text"] = formatted_prompt
    return sample

def preprocess_dataset(tokenizer, max_length, dataset):
    """Tokenize and format dataset"""
    dataset = dataset.map(create_prompt_formats)
    processed_dataset = dataset.map(
        lambda samples: tokenizer(
            samples["text"],
            max_length=max_length,
            truncation=True,
            padding="max_length"
        ),
        batched=True,
        remove_columns=['id','topic','dialogue','summary','text']
    )
    return processed_dataset

# Apply preprocessing
max_length = 2048  # Phi-2's context window
train_dataset = preprocess_dataset(tokenizer, max_length, dataset["train"])
eval_dataset = preprocess_dataset(tokenizer, max_length, dataset["validation"])


Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Map:   0%|          | 0/499 [00:00<?, ? examples/s]

In [21]:
!pip install -U trl transformers datasets peft



In [23]:
# %% [Training Setup]
from trl import DataCollatorForCompletionOnlyLM
from trl import SFTTrainer
from transformers import DataCollatorForLanguageModeling

# Initialize the SFTTrainer with latest API
trainer = SFTTrainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    args=training_args,
    tokenizer=tokenizer,
    formatting_func=lambda example: [example["text"]],  # Required formatting
    
)

# %% [Start Training]
print_gpu_utilization()
try:
    trainer.train()
except Exception as e:
    print(f"Training failed: {e}")
    print("Saving model checkpoint before exiting...")
    trainer.save_model(output_dir)

  trainer = SFTTrainer(


Converting train dataset to ChatML:   0%|          | 0/1999 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1999 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1999 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/499 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/499 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/499 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


GPU memory occupied: 3380 MB.


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
50,139.7201,
100,0.0,
150,0.0,
200,6.9083,
250,2.2043,
300,170.8823,
350,0.0,
400,0.0482,
450,0.0,
500,1.951,


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Tr

In [24]:
# %% [Model Saving]
trainer.model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

('./phi-2-dialogsum-ft/tokenizer_config.json',
 './phi-2-dialogsum-ft/special_tokens_map.json',
 './phi-2-dialogsum-ft/vocab.json',
 './phi-2-dialogsum-ft/merges.txt',
 './phi-2-dialogsum-ft/added_tokens.json')

In [26]:
# %% [Hugging Face Upload]
try:
    notebook_login()
    trainer.model.push_to_hub("Krati132/phi-2-dialogsum-finetuned")
    tokenizer.push_to_hub("Krati132/phi-2-dialogsum-finetuned")
except Exception as e:
    print(f"Failed to upload model to Hugging Face Hub: {e}")
    print("Model saved locally at:", output_dir)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

adapter_model.safetensors:   0%|          | 0.00/210M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

In [35]:
# %% [Simplified Evaluation]
def generate_summary(model, tokenizer, dialogue, max_new_tokens=50):
    """
    Generate a summary for a given dialogue.
    """
    prompt = f"Instruct: Summarize the following conversation.\n{dialogue}\nOutput:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,  # Reduced for faster generation
        temperature=0.3,
        top_p=0.9,
        repetition_penalty=1.2,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Output:\n")[-1]

# Ensure the dataset is properly loaded
test_samples = dataset["test"]  # Access the test split
results = []

# Test on just 2 samples for quick evaluation
for i in range(2):  # Use only the first 2 samples
    sample = test_samples[i]  # Access each sample by index
    dialogue = sample["dialogue"]
    summary = sample["summary"]

    # Generate summaries
    original_output = generate_summary(original_model, tokenizer, dialogue, max_new_tokens=50)
    finetuned_output = generate_summary(peft_model, tokenizer, dialogue, max_new_tokens=50)
    
    # Store results
    results.append({
        "dialogue": dialogue,
        "original_summary": original_output,
        "finetuned_summary": finetuned_output,
        "human_summary": summary
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)
print(results_df)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


                                            dialogue  \
0  #Person1#: Ms. Dawson, I need you to take a di...   
1  #Person1#: Ms. Dawson, I need you to take a di...   

                                    original_summary  \
0  Ms. Dawson instructed her assistant to summari...   
1  Ms. Dawson instructed her assistant to summari...   

                                   finetuned_summary  \
0  Ms. Dawson instructed her assistant to summari...   
1  Ms. Dawson instructed her assistant to summari...   

                                       human_summary  
0  Ms. Dawson helps #Person1# to write a memo to ...  
1  In order to prevent employees from wasting tim...  


In [None]:
import evaluate

# ROUGE Metric Calculation
rouge = evaluate.load("rouge")
finetuned_scores = rouge.compute(
    predictions=[res["finetuned_summary"] for res in results],
    references=[res["human_summary"] for res in results]
)

print(f"Fine-tuned Model ROUGE Scores:")

print(f"ROUGE-1: {finetuned_scores['rouge1']*100:.2f}%")
print(f"ROUGE-2: {finetuned_scores['rouge2']*100:.2f}%")

print(f"ROUGE-L: {finetuned_scores['rougeL']*100:.2f}%")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Fine-tuned Model ROUGE Scores:
ROUGE-1: 26.58%
ROUGE-2: 4.18%
ROUGE-L: 18.77%


Link to Hugging Face Model: https://huggingface.co/Krati132/phi-2-dialogsum-finetuned

GitHub Link: https://github.com/kratipandya/Assignment5/tree/main