<a href="https://colab.research.google.com/github/tamaskecskemeti/nlp_thesis/blob/main/Large_Language_Models_based_Automatic_Text_Summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install huggingface_hub
!pip install datasets
!pip install evaluate
!pip install rouge_score
!pip install bert_score
!pip install meteor_score
!pip install gradio
!pip install bitsandbytes
!pip install --upgrade transformers accelerate bitsandbytes
!pip3 install --upgrade trl

In [None]:
from pathlib import Path
import torch
import itertools
import random
from huggingface_hub import login
from datasets import Dataset
import evaluate
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, Gemma3ForCausalLM, BitsAndBytesConfig # AutoModelForSeq2SeqLM
from transformers import TrainingArguments, Trainer
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import gradio as gr
import gc
import os
import time
import psutil
import seaborn as sns

In [None]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

In [None]:
compute_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
compute_dtype

In [None]:
# Login and mount
hf_token = "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"
login(token=hf_token,add_to_git_credential=True)

from google.colab import drive
drive.mount('/content/drive')

os.environ["WANDB_DISABLED"] = "true"

In [None]:
random.seed(42)

In [None]:
df = pd.read_csv("news_and_summaries.csv", sep=',')
dataset = Dataset.from_pandas(df)

In [None]:
# Perform the 80-20 train-holdout split
train_holdout_split = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_holdout_split['train']
holdout_dataset = train_holdout_split['test']

In [None]:
def get_gpu_memory():
    allocated = torch.cuda.memory_allocated() / 1e6
    reserved = torch.cuda.memory_reserved() / 1e6
    return round(allocated, 1), round(reserved, 1)

def get_ram_usage():
    vm = psutil.virtual_memory()
    return round(vm.used / 1e9, 2), round(vm.percent, 1)

def log_resources(label=""):
    gpu_alloc, gpu_reserved = get_gpu_memory()
    ram_used, ram_pct = get_ram_usage()
    print(f"\n{label} Resources:")
    print(f"   GPU Allocated: {gpu_alloc} MB")
    print(f"   GPU Reserved:  {gpu_reserved} MB")
    print(f"   RAM Used:      {ram_used} GB ({ram_pct}%)\n")

In [None]:
def preprocess_data(examples):
    inputs = ["You are a helpful assistant.\nBelow is a political text. Summarize it in a few concise sentences and return only the summary.\n\nText:\n\n{text}\n\nSummary:" + doc for doc in examples['text']]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")

    # Tokenize summaries
    labels = tokenizer(examples['summary'], max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]

    # Ensure padding tokens are ignored in the loss calculation
    model_inputs["labels"] = [
      [(token if token != tokenizer.pad_token_id else -100) for token in label]
      for label in labels["input_ids"]
    ]
    return model_inputs

In [None]:
# OLD Function to generate summary
## def generate_summary(text):
##     inputs = tokenizer(f"\nBelow is a long political text. Summarize it in a few sentences and return only the summary. I repeat, only the summary!\n\nText:\n\n{text}\n\n. The summary of this text, without the input text:", return_tensors="pt", max_length=512, truncation=True).to(device)
##     summary_ids = model.generate(inputs['input_ids'],
##                                  max_length=256,
##                                  min_length=64,
##                                  num_beams=4,
##                                  temperature=0.8,
##                                  top_k=50,
##                                  top_p=0.9,
##                                  repetition_penalty=1.5,
##                                  no_repeat_ngram_size=4,
##                                  early_stopping=True)
##     decoded = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
##
##     # summary = remove_copied_sections(decoded, text)
##     # Strip the summary so only the part after the "Text:" is kept
##     if "Text:" in decoded:
##         summary = decoded.split("The summary of this text:")[-1].strip()
##     else:
##         summary = decoded.strip()
##     return summary

In [None]:
# Chunk text to paragraphs and make pairs out of them
def chunk_by_paragraphs(text, min_len=50):
    # Split by paragraph breaks
    paragraphs = [p.strip() for p in text.split('\n\n') if len(p.strip()) > 0]

    # Filter short paragraphs
    paragraphs = [p for p in paragraphs if len(p) > min_len]

    # Group paragraphs in pairs
    paired_chunks = []
    for i in range(0, len(paragraphs), 2):
        pair = paragraphs[i]
        if i + 1 < len(paragraphs):
            pair += "\n\n" + paragraphs[i + 1]
        paired_chunks.append(pair)

    return paired_chunks

# Summarize paragraphs pairs
def summarize_paragraphs(model, tokenizer, text, device='cuda'):
    chunks = chunk_by_paragraphs(text)
    summaries = []

    for chunk in chunks:
        inputs = tokenizer(f"Below is a political text. Summarize it in a few sentences and return only the summary.\n\nText:\n\n{chunk}\n\n. The summary of this text:", return_tensors="pt", truncation=True, max_length=512).to(device)
        summary_ids = model.generate(
            inputs["input_ids"],
            max_new_tokens=150,
            num_beams=1,
            repetition_penalty=1.5,
            no_repeat_ngram_size=4,
            temperature=0.8,
            top_p=0.9,
            early_stopping=True
        )
        decoded = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        # Strip the summary so only the part after the "Text:" is kept
        if "Text:" in decoded:
          summary = decoded.split("The summary of this text:")[-1].strip()
        else:
          summary = decoded.strip()
        summaries.append(summary)
    return summaries

# Final summarization
def summarize_overall(model, tokenizer, summaries, device='cuda'):
    combined = ' '.join(summaries)
    inputs = tokenizer(f"Below is a political text. Summarize it in a few sentences and return only the summary.\n\nText:\n\n{combined}\n\n. The summary of this text:", return_tensors="pt", truncation=True, max_length=512).to(device)
    summary_ids = model.generate(
        inputs["input_ids"],
        max_new_tokens=150,
        num_beams=1,
        repetition_penalty=1.5,
        no_repeat_ngram_size=4,
        temperature=0.8,
        top_p=0.9,
        early_stopping=True
    )
    decoded = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    # Strip the summary so only the part after the "Text:" is kept
    if "Text:" in decoded:
      summary = decoded.split("The summary of this text:")[-1].strip()
    else:
      summary = decoded.strip()
    return summary

def generate_summary(text):
  summaries = summarize_paragraphs(model, tokenizer, text)
  final_summary = summarize_overall(model, tokenizer, summaries)
  return final_summary


In [None]:
# Load the necessary metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")
meteor = evaluate.load("meteor")

In [None]:
# Load the benchmark pre fine-tuned model
model_name = "google/gemma-3-1b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = Gemma3ForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16).to(device)

In [None]:
holdout_dataset['text'][1]

In [None]:
# Apply the summarization function on the holdout set
holdout_summaries = [generate_summary(text) for text in holdout_dataset['text'][1]]

In [None]:
holdout_summaries[1]

In [None]:
holdout_dataset['summary'][1]

In [None]:
# Compute ROUGE
rouge_score = rouge.compute(predictions=holdout_summaries, references=holdout_dataset['summary'])
print("ROUGE Score:", rouge_score)

In [None]:
# Compute BLEU
bleu_predictions = [summary for summary in holdout_summaries]
bleu_references = [[ref] for ref in holdout_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

In [None]:
# Compute METEOR
meteor_score = meteor.compute(predictions=holdout_summaries, references=holdout_dataset['summary'])
print("METEOR Score:", meteor_score)

In [None]:
# Tokenize dataset
model_name = "google/gemma-3-1b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map='auto')
tokenized_train_dataset = train_dataset.map(preprocess_data, batched=True)

# Perform the 80-20 train-test split
train_test_split = tokenized_train_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split['train']
validation_dataset = train_test_split['test']

In [None]:
learning_rates = [1e-05, 2e-05, 3e-05, 5e-05]
batch_sizes = [4, 8]
combinations = [(lr, bs) for lr in learning_rates for bs in batch_sizes]

model_name = "google/gemma-3-1b-it"

for lr, bs in combinations:
  print(f"Learning Rate: {lr} and Batch Size: {bs} is running")
  try:
    del model
  except:
    pass
  try:
    del trainer
  except:
    pass
  try:
    del training_args
  except:
    pass
  try:
    del tokenizer
  except:
    pass
  gc.collect()
  torch.cuda.empty_cache()


  save_path = f"/content/drive/My Drive/my_summarizer_model/lora_finetuned_model_{lr}_{bs}"

  # Load Tokenizer
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.save_pretrained(save_path)

  # Configure LoRA
  lora_config = LoraConfig(
      r=8,
      lora_alpha=32,
      lora_dropout=0.1,
      # target_modules="all-linear",
      target_modules=["q_proj", "v_proj"],
      task_type="CAUSAL_LM"
      )

  model = Gemma3ForCausalLM.from_pretrained(model_name,
                                            attn_implementation="eager",
                                            device_map="auto",
                                            torch_dtype=torch.float16)

  # Reduce memory usage
  model.config.use_cache = False
  model.gradient_checkpointing_enable()

  model = get_peft_model(model, lora_config)

  training_args = TrainingArguments(
      output_dir=save_path,
      learning_rate=lr,
      per_device_train_batch_size=bs,
      per_device_eval_batch_size=4,
      gradient_accumulation_steps=1,
      num_train_epochs=4,
      weight_decay=0.01,
      save_strategy="best",
      load_best_model_at_end=True,
      metric_for_best_model="eval_meteor",
      greater_is_better=True,
      save_total_limit=1,
      fp16=True,
      label_names=["labels"],
      report_to="none",
      optim="adamw_torch_fused"
  )

  trainer = SFTTrainer(
      model=model,
      args=training_args,
      train_dataset=train_dataset,
      eval_dataset=validation_dataset,
      peft_config=lora_config
  )

  torch.cuda.reset_peak_memory_stats()
  log_resources("Before training")
  start_time = time.time()
  # Start training
  trainer.train()

  end_time = time.time()
  log_resources("After training")
  peak_alloc = torch.cuda.max_memory_allocated() / 1e6
  peak_reserved = torch.cuda.max_memory_reserved() / 1e6

  print(f"Peak GPU Allocated: {peak_alloc:.2f} MB")
  print(f"Peak GPU Reserved:  {peak_reserved:.2f} MB")
  print(f"Elapsed Time: {round(end_time - start_time, 2)} seconds")

  trainer.save_model()

  # Final cleanup
  try:
      del model, trainer, training_args, tokenizer
  except:
      pass
  gc.collect()
  torch.cuda.empty_cache()

In [None]:
learning_rates = [1e-05, 2e-05, 3e-05, 5e-05]
batch_sizes = [4, 8]
combinations = [(lr, bs) for lr in learning_rates for bs in batch_sizes]

try:
  del model
except:
  pass
try:
  del trainer
except:
  pass
try:
  del training_args
except:
  pass
try:
  del tokenizer
except:
  pass
gc.collect()
torch.cuda.empty_cache()

evaluation = pd.DataFrame(columns=['model', 'ROUGE Score', 'BLEU Score', 'METEOR Score'])
for lr, bs in combinations:
  tokenizer = AutoTokenizer.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/lora_finetuned_model_{lr}_{bs}")
  model = Gemma3ForCausalLM.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/lora_finetuned_model_{lr}_{bs}", device_map="auto", torch_dtype=torch.float16).to(device)
  test_summaries = [generate_summary(text) for text in holdout_dataset['text']]
  rouge_score = rouge.compute(predictions=test_summaries, references=holdout_dataset['summary'])
  bleu_predictions = [summary for summary in test_summaries]
  bleu_references = [[ref] for ref in holdout_dataset['summary']]
  bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
  meteor_score = meteor.compute(predictions=test_summaries, references=holdout_dataset['summary'])
  evaluation.loc[len(evaluation)] = [f'lora_finetuned_model_{lr}_{bs}', rouge_score, bleu_score, meteor_score]

In [None]:
evaluation.head(12)

In [None]:
learning_rates = [1e-05, 2e-05, 3e-05, 5e-05]
batch_sizes = [4, 8]
combinations = [(lr, bs) for lr in learning_rates for bs in batch_sizes]


model_name = "google/gemma-3-1b-it"

for lr, bs in combinations:
  print(f"Learning Rate: {lr} and Batch Size: {bs} is running")

  try:
    del model
  except:
    pass
  try:
    del trainer
  except:
    pass
  try:
    del training_args
  except:
    pass
  try:
    del tokenizer
  except:
    pass
  gc.collect()
  torch.cuda.empty_cache()

  save_path = f"/content/drive/My Drive/my_summarizer_model/qlora_finetuned_model_{lr}_{bs}"

  if bs == 8:
    bs = 4
    gas = 2
  else: gas = 1

  bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_storage=torch.float16
    )

  # Load Tokenizer
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.save_pretrained(save_path)

  # Configure QLoRA
  lora_config = LoraConfig(
      r=8,
      lora_alpha=32,
      lora_dropout=0.1,
      # target_modules="all-linear",
      target_modules=["q_proj", "v_proj"],
      task_type="CAUSAL_LM"
      )

  model = Gemma3ForCausalLM.from_pretrained(model_name,
                                            attn_implementation="eager",
                                            device_map="auto",
                                            quantization_config=bnb_config,
                                            torch_dtype=torch.float16)

    # Reduce memory usage
  model.config.use_cache = False
  model.gradient_checkpointing_enable()

  model = prepare_model_for_kbit_training(model)
  model = get_peft_model(model, lora_config)

  training_args = TrainingArguments(
      output_dir=save_path,
      learning_rate=lr,
      per_device_train_batch_size=bs,
      per_device_eval_batch_size=4,
      gradient_accumulation_steps=gas,
      num_train_epochs=4,
      weight_decay=0.01,
      save_strategy="best",
      load_best_model_at_end=True,
      metric_for_best_model="eval_meteor",
      greater_is_better=True,
      save_total_limit=1,
      fp16=True,
      label_names=["labels"],
      report_to="none",
      optim="adamw_torch_fused"
  )

  trainer = SFTTrainer(
      model=model.to(device),
      args=training_args,
      train_dataset=train_dataset,
      eval_dataset=validation_dataset,
      peft_config=lora_config
  )

  torch.cuda.reset_peak_memory_stats()
  log_resources("Before training")
  start_time = time.time()
  # Start training
  trainer.train()

  end_time = time.time()
  log_resources("After training")
  peak_alloc = torch.cuda.max_memory_allocated() / 1e6
  peak_reserved = torch.cuda.max_memory_reserved() / 1e6

  print(f"Peak GPU Allocated: {peak_alloc:.2f} MB")
  print(f"Peak GPU Reserved:  {peak_reserved:.2f} MB")
  print(f"Elapsed Time: {round(end_time - start_time, 2)} seconds")

  trainer.save_model()

  # Final cleanup
  try:
      del model, trainer, training_args, tokenizer
  except:
      pass
  gc.collect()
  torch.cuda.empty_cache()

In [None]:
learning_rates = [1e-05, 2e-05, 3e-05, 5e-05]
batch_sizes = [4, 8]
combinations = [(lr, bs) for lr in learning_rates for bs in batch_sizes]

try:
  del model
except:
  pass
try:
  del trainer
except:
  pass
try:
  del training_args
except:
  pass
try:
  del tokenizer
except:
  pass
gc.collect()
torch.cuda.empty_cache()

evaluation = pd.DataFrame(columns=['model', 'ROUGE Score', 'BLEU Score', 'METEOR Score'])
for lr, bs in combinations:
  tokenizer = AutoTokenizer.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/qlora_finetuned_model_{lr}_{bs}")
  model = Gemma3ForCausalLM.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/qlora_finetuned_model_{lr}_{bs}", device_map="auto", torch_dtype=torch.float16).to(device)
  test_summaries = [generate_summary(text) for text in holdout_dataset['text']]
  rouge_score = rouge.compute(predictions=test_summaries, references=holdout_dataset['summary'])
  bleu_predictions = [summary for summary in test_summaries]
  bleu_references = [[ref] for ref in holdout_dataset['summary']]
  bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
  meteor_score = meteor.compute(predictions=test_summaries, references=holdout_dataset['summary'])
  evaluation.loc[len(evaluation)] = [f'qlora_finetuned_model_{lr}_{bs}', rouge_score, bleu_score, meteor_score]

In [None]:
evaluation.head(12)

In [None]:
# Data for LoRA
lora_data = [
    {"Learning Rate": "1e-05", "Batch Size": 4, "Peak GPU Allocated": 8603.63, "Before GPU Reserved": 2657.1, "After GPU Reserved": 10812.9, "Training Time": 278.79},
    {"Learning Rate": "1e-05", "Batch Size": 8, "Peak GPU Allocated": 15180.35, "Before GPU Reserved": 2699.0, "After GPU Reserved": 15321.8, "Training Time": 285.57},
    {"Learning Rate": "2e-05", "Batch Size": 4, "Peak GPU Allocated": 10746.97, "Before GPU Reserved": 2699.0, "After GPU Reserved": 11882.5, "Training Time": 276.52},
    {"Learning Rate": "2e-05", "Batch Size": 8, "Peak GPU Allocated": 15180.35, "Before GPU Reserved": 2699.0, "After GPU Reserved": 15300.8, "Training Time": 283.17},
    {"Learning Rate": "3e-05", "Batch Size": 4, "Peak GPU Allocated": 8603.63, "Before GPU Reserved": 2699.0, "After GPU Reserved": 10812.9, "Training Time": 275.28},
    {"Learning Rate": "3e-05", "Batch Size": 8, "Peak GPU Allocated": 15180.35, "Before GPU Reserved": 2699.0, "After GPU Reserved": 15279.8, "Training Time": 293.81},
    {"Learning Rate": "5e-05", "Batch Size": 4, "Peak GPU Allocated": 8603.63, "Before GPU Reserved": 2699.0, "After GPU Reserved": 10812.9, "Training Time": 276.42},
    {"Learning Rate": "5e-05", "Batch Size": 8, "Peak GPU Allocated": 15180.35, "Before GPU Reserved": 2699.0, "After GPU Reserved": 15300.8, "Training Time": 290.73},
]

# Data for QLoRA
qlora_data = [
    {"Learning Rate": "1e-05", "Batch Size": 4, "Peak GPU Allocated": 8902.32, "Before GPU Reserved": 2487.2, "After GPU Reserved": 10244.6, "Training Time": 348.57},
    {"Learning Rate": "1e-05", "Batch Size": 8, "Peak GPU Allocated": 8905.35, "Before GPU Reserved": 2508.2, "After GPU Reserved": 10139.7, "Training Time": 344.11},
    {"Learning Rate": "2e-05", "Batch Size": 4, "Peak GPU Allocated": 8902.32, "Before GPU Reserved": 2508.2, "After GPU Reserved": 10139.7, "Training Time": 354.15},
    {"Learning Rate": "2e-05", "Batch Size": 8, "Peak GPU Allocated": 8905.35, "Before GPU Reserved": 2508.2, "After GPU Reserved": 10139.7, "Training Time": 344.15},
    {"Learning Rate": "3e-05", "Batch Size": 4, "Peak GPU Allocated": 8902.32, "Before GPU Reserved": 2508.2, "After GPU Reserved": 10139.7, "Training Time": 354.46},
    {"Learning Rate": "3e-05", "Batch Size": 8, "Peak GPU Allocated": 9988.56, "Before GPU Reserved": 2508.2, "After GPU Reserved": 10139.7, "Training Time": 344.06},
    {"Learning Rate": "5e-05", "Batch Size": 4, "Peak GPU Allocated": 8902.32, "Before GPU Reserved": 2508.2, "After GPU Reserved": 10139.7, "Training Time": 354.19},
    {"Learning Rate": "5e-05", "Batch Size": 8, "Peak GPU Allocated": 9985.58, "Before GPU Reserved": 2508.2, "After GPU Reserved": 10139.7, "Training Time": 344.2},
]

In [None]:
# DataFrames for LoRA and QLoRA
df_lora = pd.DataFrame(lora_data)
df_qlora = pd.DataFrame(qlora_data)

In [None]:
# Calculate GPU Reserved Difference
df_lora["GPU Reserved Difference"] = df_lora["After GPU Reserved"] - df_lora["Before GPU Reserved"]
df_qlora["GPU Reserved Difference"] = df_qlora["After GPU Reserved"] - df_qlora["Before GPU Reserved"]

In [None]:
df_lora["Hyperparameter combination"] = pd.concat([df_lora["Learning Rate"], df_lora["Batch Size"]], axis=1).apply(lambda x: f"{x[0]}, {x[1]}", axis=1)
df_qlora["Hyperparameter combination"] = pd.concat([df_qlora["Learning Rate"], df_qlora["Batch Size"]], axis=1).apply(lambda x: f"{x[0]}, {x[1]}", axis=1)

In [None]:
# Select relevant columns
df_lora = df_lora[["Hyperparameter combination", "Peak GPU Allocated", "GPU Reserved Difference", "Training Time"]]
df_qlora = df_qlora[["Hyperparameter combination", "Peak GPU Allocated", "GPU Reserved Difference", "Training Time"]]

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(14, 12))

# Peak GPU Usage
sns.lineplot(x="Hyperparameter combination", y="Peak GPU Allocated", data=df_lora, label="LoRA", marker='o', ax=ax[0])
sns.lineplot(x="Hyperparameter combination", y="Peak GPU Allocated", data=df_qlora, label="QLoRA", marker='x', ax=ax[0])
ax[0].set_title("Peak GPU Usage Comparison")
ax[0].set_xlabel("Hyperparameter Combination (Learning Rate, Batch Size)")
ax[0].set_ylabel("Peak GPU Allocated (MB)")
ax[0].tick_params(axis='x', rotation=45)
ax[0].grid(alpha=0.3)

# GPU Reserved Difference
sns.lineplot(x="Hyperparameter combination", y="GPU Reserved Difference", data=df_lora, label="LoRA", marker='o', ax=ax[1])
sns.lineplot(x="Hyperparameter combination", y="GPU Reserved Difference", data=df_qlora, label="QLoRA", marker='x', ax=ax[1])
ax[1].set_title("GPU Reserved Difference Comparison")
ax[1].set_xlabel("Hyperparameter Combination (Learning Rate, Batch Size)")
ax[1].set_ylabel("GPU Reserved Difference (MB)")
ax[1].tick_params(axis='x', rotation=45)
ax[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(14, 8))

# Training Time Comparison
sns.lineplot(x="Hyperparameter combination", y="Training Time", data=df_lora, label="LoRA", marker='o')
sns.lineplot(x="Hyperparameter combination", y="Training Time", data=df_qlora, label="QLoRA", marker='x')
ax.set_title("Training Time Comparison")
ax.set_xlabel("Hyperparameter Combination (Learning Rate, Batch Size)")
ax.set_ylabel("Training Time (seconds)")
ax.tick_params(axis='x', rotation=45)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
tokenizer = AutoTokenizer.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/lora_finetuned_model_3e-05_4")
model = Gemma3ForCausalLM.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/lora_finetuned_model_3e-05_4", device_map="auto", torch_dtype=torch.float16).to(device)
lora_summaries = [generate_summary(text) for text in holdout_dataset['text']]

In [None]:
lora_summaries[1]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/qlora_finetuned_model_5e-05_4")
model = Gemma3ForCausalLM.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/qlora_finetuned_model_5e-05_4", device_map="auto", torch_dtype=torch.float16).to(device)
qlora_summaries = [generate_summary(text) for text in holdout_dataset['text']]

In [None]:
qlora_summaries[1]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/lora_finetuned_model_3e-05_4")
model = Gemma3ForCausalLM.from_pretrained(f"/content/drive/My Drive/my_summarizer_model/lora_finetuned_model_3e-05_4", device_map="auto", torch_dtype=torch.float16).to(device)

In [None]:
iface = gr.Interface(
    fn=generate_summary,
    inputs=gr.Textbox(lines=10, label="Enter text to summarize"),
    outputs=gr.Textbox(label="Summary"),
    title="Text Summarizer",
    description="Enter a paragraph and the model will generate a summary."
)

iface.launch()