In [1]:
# one-time installs (run in a notebook cell)
!pip install -q transformers datasets accelerate peft evaluate sentencepiece gradio


In [2]:
import importlib

# List of required packages
packages = [
    "transformers",
    "datasets",
    "accelerate",
    "bitsandbytes",
    "peft",
    "evaluate",
    "sentencepiece",
    "gradio"
]

print("üîç Checking installations...\n")

for pkg in packages:
    try:
        module = importlib.import_module(pkg)
        version = getattr(module, "__version__", "unknown")
        print(f"‚úÖ {pkg} installed ‚Äî version: {version}")
    except ImportError:
        print(f"‚ùå {pkg} not installed!")


üîç Checking installations...

‚úÖ transformers installed ‚Äî version: 4.57.1
‚úÖ datasets installed ‚Äî version: 4.1.1
‚úÖ accelerate installed ‚Äî version: 1.11.0
‚úÖ bitsandbytes installed ‚Äî version: 0.48.3.dev0


2025-11-03 15:11:11.045559: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762182671.068291     722 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762182671.075232     722 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


‚úÖ peft installed ‚Äî version: 0.17.1
‚úÖ evaluate installed ‚Äî version: 0.4.6
‚úÖ sentencepiece installed ‚Äî version: 0.2.0
‚úÖ gradio installed ‚Äî version: 5.38.1


In [3]:
!pip install -q datasets
!pip install -q evaluate

In [4]:
!pip install rouge_score --quiet


In [5]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0))


CUDA available: True
GPU name: Tesla P100-PCIE-16GB


In [6]:
# =========================================================
# Fine-tuning T5-small (CNN/DailyMail) on Kaggle ‚Äî fp16 + LoRA
# =========================================================

import torch
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
import evaluate
import numpy as np
import os

# =========================================================
# Load dataset from Kaggle input directory
# =========================================================
train_file = "/kaggle/input/cnn_dailymail/train.csv"
val_file = "/kaggle/input/cnn_dailymail/validation.csv"

print("üì¶ Loading dataset...")
dataset = load_dataset("csv", data_files={
    "train": train_file,
    "validation": val_file
})

print(f"‚úÖ Loaded {len(dataset['train'])} training and {len(dataset['validation'])} validation samples")

# Use smaller subset for faster fine-tuning
small_train = dataset["train"].select(range(2000))
small_val = dataset["validation"].select(range(200))

# =========================================================
# Tokenizer and model setup (fp16 mode)
# =========================================================
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("‚öôÔ∏è Loading model in fp16 mode...")
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# =========================================================
# Add LoRA adapter
# =========================================================
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# =========================================================
# Preprocessing
# =========================================================
max_input_len = 512
max_target_len = 128

def preprocess_fn(batch):
    inputs = ["summarize: " + doc for doc in batch["article"]]
    model_inputs = tokenizer(inputs, max_length=max_input_len, truncation=True)
    labels = tokenizer(batch["highlights"], max_length=max_target_len, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

print("üîÑ Tokenizing data...")
tokenized_train = small_train.map(preprocess_fn, batched=True, remove_columns=small_train.column_names)
tokenized_val = small_val.map(preprocess_fn, batched=True, remove_columns=small_val.column_names)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
rouge = evaluate.load("rouge")

# =========================================================
# Evaluation metric (with overflow fix)
# =========================================================
def postprocess_text(preds, labels):
    preds = [p.strip() for p in preds]
    labels = [l.strip() for l in labels]
    return preds, labels

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    preds = preds.astype(np.int32)
    labels = labels.astype(np.int32)

    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    result = {k: round(v["fmeasure"] * 100, 2) if isinstance(v, dict) else round(v * 100, 2) for k, v in result.items()}
    return result

# =========================================================
# Training setup (no early eval to prevent decode crash)
# =========================================================
training_args = Seq2SeqTrainingArguments(
    output_dir="./t5_small_cnn_fp16",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    predict_with_generate=True,
    eval_strategy="no",  # ‚úÖ avoids OverflowError
    save_strategy="epoch",
    save_total_limit=1,
    logging_steps=50,
    report_to="none",
    generation_max_length=64,  # shorter eval generation
    generation_num_beams=4
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# =========================================================
# Train and Save
# =========================================================
print("üöÄ Starting fine-tuning (fp16 + LoRA)...")
trainer.train()

print("üíæ Saving model...")
trainer.save_model("./t5_small_cnn_fp16")
tokenizer.save_pretrained("./t5_small_cnn_fp16")

print("‚úÖ Training complete and model saved successfully!")

# =========================================================
# Optional: Evaluate after training
# =========================================================
print("üìä Running final evaluation...")
metrics = trainer.evaluate()
print("Final Evaluation Metrics:", metrics)


üì¶ Loading dataset...
‚úÖ Loaded 287113 training and 13368 validation samples


`torch_dtype` is deprecated! Use `dtype` instead!


‚öôÔ∏è Loading model in fp16 mode...
trainable params: 589,824 || all params: 61,096,448 || trainable%: 0.9654
üîÑ Tokenizing data...


  trainer = Seq2SeqTrainer(
The model is already on multiple devices. Skipping the move to device specified in `args`.


üöÄ Starting fine-tuning (fp16 + LoRA)...


Step,Training Loss
50,2.4228
100,2.1662
150,2.161
200,2.0976
250,2.1073
300,2.1282
350,2.0733


üíæ Saving model...
‚úÖ Training complete and model saved successfully!
üìä Running final evaluation...


Final Evaluation Metrics: {'eval_loss': 1.8787391185760498, 'eval_rouge1': 39.57, 'eval_rouge2': 18.03, 'eval_rougeL': 28.63, 'eval_rougeLsum': 28.58, 'eval_runtime': 58.3706, 'eval_samples_per_second': 3.426, 'eval_steps_per_second': 0.857, 'epoch': 3.0}


In [7]:

# Zip the saved model directory
import shutil

model_dir = "./t5_small_cnn_fp16"
zip_path = "./t5_small_cnn_fp16.zip"

# Remove existing zip if any
if os.path.exists(zip_path):
    os.remove(zip_path)

# Create zip archive
shutil.make_archive(base_name=model_dir, format='zip', root_dir=model_dir)

print(f"‚úÖ Model zipped successfully at: {zip_path}")


‚úÖ Model zipped successfully at: ./t5_small_cnn_fp16.zip


In [9]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel

# =========================================================
# Load the fine-tuned model
# =========================================================
model_path = "./t5_small_cnn_fp16"  # Update this path if needed

print("Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load base model first
base_model = AutoModelForSeq2SeqLM.from_pretrained(
    "t5-small",
    torch_dtype=torch.float16,
    device_map="auto"
)

# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, model_path)
model.eval()
print("Model loaded successfully!")

# =========================================================
# Summarization function
# =========================================================
def summarize_text(article):
    """Generate a summary for the given article."""
    if not article.strip():
        return "Please enter some text to summarize."
    
    # Prepare input
    input_text = "summarize: " + article
    inputs = tokenizer(
        input_text,
        max_length=512,
        truncation=True,
        return_tensors="pt"
    ).to(model.device)
    
    # Generate summary
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=128,
            num_beams=4,
            early_stopping=True
        )
    
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

# =========================================================
# Example articles for demo
# =========================================================
examples = [
    ["""The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower. Constructed from 1887 to 1889 as the entrance to the 1889 World's Fair, it was initially criticized by some of France's leading artists and intellectuals for its design, but it has become a global cultural icon of France and one of the most recognizable structures in the world."""],
    ["""Climate change is causing significant impacts on Earth's weather patterns. Rising global temperatures are leading to more frequent and severe heat waves, droughts, and storms. Scientists warn that without immediate action to reduce greenhouse gas emissions, these effects will continue to worsen, threatening ecosystems, food security, and human populations worldwide."""]
]

# =========================================================
# Gradio Interface
# =========================================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# T5 Article Summarizer")
    gr.Markdown("Fine-tuned T5-small model for text summarization")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Article Text",
                placeholder="Paste your article here...",
                lines=12
            )
            submit_btn = gr.Button("Summarize", variant="primary", size="lg")
        
        with gr.Column():
            output_text = gr.Textbox(
                label="Summary",
                lines=12
            )
    
    gr.Examples(
        examples=examples,
        inputs=[input_text],
        label="Try an example"
    )
    
    submit_btn.click(
        fn=summarize_text,
        inputs=input_text,
        outputs=output_text
    )

# =========================================================
# Launch the app
# =========================================================
if __name__ == "__main__":
    demo.launch(share=True)

Loading tokenizer and model...
Model loaded successfully!
* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://9b1adbc5c33445fd01.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
