In [None]:
CLEAN_TEXT_COLUMN='article'
SUMMARY_COLUMN='highlights'

In [None]:
!pip install tensorboard
!pip install tensorboard-data-server
!pip install google-cloud-storage
!pip install tbparse matplotlib seaborn pandas numpy



In [None]:
# ============================================================================
# GOOGLE CLOUD STORAGE (GCS) SETUP
# ============================================================================

# This sets up gcsfuse to mount a Google Cloud Storage bucket for model storage.
# ============================================================================

from google.colab import auth
auth.authenticate_user()

# Install gcsfuse
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse



In [None]:
!mkdir pegasus_model
!gcsfuse --implicit-dirs pegasus_2k_model pegasus_model

In [None]:
## THIS IS TO LOAD ALL MODELS AND ON A100

import gradio as gr
import torch
import time
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    PegasusTokenizer,
    PegasusForConditionalGeneration,
)

# ==================================================
# A100 OPTIMIZATIONS
# ==================================================
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

device = "cuda"
assert torch.cuda.is_available(), "GPU is required"
print("Using GPU:", torch.cuda.get_device_name(0))

# ==================================================
# MODEL CONFIG
# ==================================================
MODEL_CONFIG = {
    "T5 Base": {
        "type": "t5",
        "base_model": "t5-base",
        "variants": {
            "2k":  "/content/models/t5-base/2k/checkpoint-XXX",
            "10k": "/content/models/t5-base/10k/checkpoint-XXX",
            "50k": "/content/models/t5-base/50k/checkpoint-XXX",
        }
    },
    "T5 Large": {
        "type": "t5",
        "base_model": "t5-large",
        "variants": {
            "2k":  "/content/models/t5-large/2k/checkpoint-XXX",
            "10k": "/content/models/t5-large/10k/checkpoint-XXX",
            "50k": "/content/models/t5-large/50k/checkpoint-XXX",
        }
    },
    "Pegasus Base": {
        "type": "pegasus",
        "base_model": "google/pegasus-arxiv",
        "variants": {
            "2k":  "/content/models/pegasus-base/2k/checkpoint-XXX",
            "10k": "/content/models/pegasus-base/10k/checkpoint-XXX",
            "50k": "/content/models/pegasus-base/50k/checkpoint-XXX",
        }
    },
    "Pegasus CNN": {
        "type": "pegasus",
        "base_model": "google/pegasus-cnn_dailymail",
        "variants": {
            "2k":  "/content/models/pegasus-cnn/2k/checkpoint-XXX",
            "10k": "/content/models/pegasus-cnn/10k/checkpoint-XXX",
            "50k": "/content/models/pegasus-cnn/50k/checkpoint-XXX",
        }
    }
}

# ==================================================
# LOAD MODELS (FP16)
# ==================================================
loaded_models = {}
loaded_tokenizers = {}

print("Loading models (FP16)...")

for model_name, cfg in MODEL_CONFIG.items():
    for variant, path in cfg["variants"].items():
        key = f"{model_name} ({variant})"
        print(f"\nLoading {key}")
        start = time.time()

        if cfg["type"] == "t5":
            tokenizer = T5Tokenizer.from_pretrained(cfg["base_model"])
            model = T5ForConditionalGeneration.from_pretrained(
                path,
                torch_dtype=torch.float16
            )
        else:
            tokenizer = PegasusTokenizer.from_pretrained(cfg["base_model"])
            model = PegasusForConditionalGeneration.from_pretrained(
                path,
                torch_dtype=torch.float16
            )

        model.to(device)
        model.eval()

        loaded_models[key] = model
        loaded_tokenizers[key] = tokenizer

        print(f"{key} loaded in {time.time() - start:.2f}s")

print("\nâœ… All models loaded in FP16")

# ==================================================
# SUMMARIZATION (FAST PATH)
# ==================================================
def summarize(text, model_key):
    model = loaded_models[model_key]
    tokenizer = loaded_tokenizers[model_key]

    if model_key.startswith("T5"):
        text = "summarize: " + text

    inputs = tokenizer(
        text,
        truncation=True,
        padding="longest",
        return_tensors="pt"
    ).to(device)

    with torch.inference_mode():
        summary_ids = model.generate(
            **inputs,
            max_length=128,
            num_beams=4,          # good quality/speed balance
            early_stopping=True,
            use_cache=True        # decoder cache (important)
        )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# ==================================================
# UI
# ==================================================
MODEL_CHOICES = list(loaded_models.keys())

with gr.Blocks() as demo:
    gr.Markdown("## ðŸš€ A100-Optimized Multi-Model Summarization")

    model_selector = gr.Dropdown(
        choices=MODEL_CHOICES,
        value=MODEL_CHOICES[0],
        label="Select Model Variant"
    )

    input_text = gr.Textbox(
        lines=10,
        label="Input text"
    )

    summarize_btn = gr.Button("Summarize")

    output_text = gr.Textbox(
        lines=5,
        label="Summary"
    )

    summarize_btn.click(
        fn=summarize,
        inputs=[input_text, model_selector],
        outputs=output_text
    )

demo.launch(share=True)



In [None]:
# =============================================================
# Imports
# =============================================================
import gradio as gr
import torch
import time
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    PegasusTokenizer,
    PegasusForConditionalGeneration,
)

# =============================================================
# GPU & A100-specific optimizations
# =============================================================
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
assert torch.cuda.is_available(), "GPU is required for this notebook"
print("Using GPU:", torch.cuda.get_device_name(0))


In [None]:
# =============================================================
# Model configuration
# Defines model families and fine-tuned variants
# =============================================================
MODEL_CONFIG = {
    "T5 Base": {
        "type": "t5",
        "base_model": "t5-base",
        "variants": {
            "2k":  "/content/models/t5_base_2k",
            "10k": "/content/models/t5_base_10k",
            "50k": "/content/models/t5_base_50k",
        },
    },
    "T5 Large": {
        "type": "t5",
        "base_model": "t5-large",
        "variants": {
            "2k":  "/content/models/t5_large_2k",
            "10k": "/content/models/t5_large_10k",
            "50k": "/content/models/t5_large_50k",
        },
    },
    "Pegasus Base": {
        "type": "pegasus",
        "base_model": "google/pegasus-arxiv",
        "variants": {
            "2k":  "/content/models/pegasus_base_2k",
            "10k": "/content/models/pegasus_base_10k",
            "50k": "/content/models/pegasus_base_50k",
        },
    },
    "Pegasus CNN": {
        "type": "pegasus",
        "base_model": "google/pegasus-cnn_dailymail",
        "variants": {
            "2k":  "/content/models/pegasus_cnn_daily_mail_2k",
            "10k": "/content/models/pegasus_cnn_daily_mail_10k",
            "50k": "/content/models/pegasus_cnn_daily_mail_50k",
        },
    },
}


In [None]:
# =============================================================
# Load models and tokenizers (FP16, inference mode)
# =============================================================
loaded_models = {}
loaded_tokenizers = {}

print("Loading models in FP16 mode...")

for model_name, cfg in MODEL_CONFIG.items():
    for variant, checkpoint_path in cfg["variants"].items():
        model_key = f"{model_name} ({variant})"
        print(f"\nLoading {model_key}")
        start_time = time.time()

        if cfg["type"] == "t5":
            tokenizer = T5Tokenizer.from_pretrained(checkpoint_path)
            model = T5ForConditionalGeneration.from_pretrained(
                checkpoint_path,
                torch_dtype=torch.float16,
            )
        else:
            tokenizer = PegasusTokenizer.from_pretrained(checkpoint_path)
            model = PegasusForConditionalGeneration.from_pretrained(
                checkpoint_path,
                torch_dtype=torch.float16,
            )

        model.to(device)
        model.eval()

        loaded_models[model_key] = model
        loaded_tokenizers[model_key] = tokenizer

        print(f"{model_key} loaded in {time.time() - start_time:.2f}s")

print("\nâœ… All models loaded successfully")


In [None]:
# =============================================================
# Fast summarization function (inference-only)
# =============================================================
def summarize(text, model_key):
    """
    Generate a summary using the selected model variant.
    Optimized for low-latency inference on A100.
    """

    model = loaded_models[model_key]
    tokenizer = loaded_tokenizers[model_key]

    # T5 requires a task prefix
    if model_key.startswith("T5"):
        text = "summarize: " + text

    inputs = tokenizer(
        text,
        truncation=True,
        padding="longest",
        return_tensors="pt",
    ).to(device)

    with torch.inference_mode():
        summary_ids = model.generate(
            **inputs,
            max_length=128,
            num_beams=4,
            early_stopping=True,
            use_cache=True,
        )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


In [None]:
# =============================================================
# Gradio UI
# =============================================================
MODEL_CHOICES = list(loaded_models.keys())

with gr.Blocks() as demo:
    gr.Markdown("## A100-Optimized Multi-Model News Summarization")

    model_selector = gr.Dropdown(
        choices=MODEL_CHOICES,
        value=MODEL_CHOICES[0],
        label="Select Model Variant",
    )

    input_text = gr.Textbox(
        lines=10,
        label="Input Text",
    )

    summarize_button = gr.Button("Summarize")

    output_text = gr.Textbox(
        lines=5,
        label="Generated Summary",
    )

    summarize_button.click(
        fn=summarize,
        inputs=[input_text, model_selector],
        outputs=output_text,
    )


In [None]:
# =============================================================
# Launch Gradio app
# =============================================================
demo.launch(share=True)
