# 🔍 LLM Text Summarization: Top 10 Colab Questions
This notebook covers the most frequently asked coding questions about using Large Language Models (LLMs) for text summarization.

Each section includes code, best practices, and comments for easy understanding.

In [1]:
!pip install -q google-cloud-aiplatform

In [4]:
# Install additional packages if they're not installed.
!pip install google-generativeai
!pip install protobuf

from google.colab import auth
from google.auth import default

auth.authenticate_user()
creds, _ = default()

# Provide system-level instruction
import google.generativeai as genai
genai.configure(api_key=creds.token)



MessageError: Error: credential propagation was unsuccessful

In [2]:
from google.colab import chat

# Provide system-level instruction
chat.set_system_instruction("You are a clinical assistant generating post-op care notes.")

# Define your few-shot prompt
prompt = """
Example 1:
Patient: John Doe, Procedure: Laparoscopic Appendectomy, Date: 04/10/2025. Vitals stable, ambulating, tolerating diet.
Post-Op Note:
Patient John Doe underwent laparoscopic appendectomy on 04/10/2025. He is currently stable, ambulating independently, and tolerating oral intake. No signs of infection or complication. Continue standard post-op care and reassess in 24 hours.

Example 2:
Patient: Jane Smith, Procedure: Total Knee Replacement, Date: 04/09/2025. Mild pain, using walker, PT initiated.
Post-Op Note:
Patient Jane Smith is post-op day 2 following total knee replacement. She reports mild pain managed with oral analgesics. Mobilizing with walker, and physical therapy has been initiated. No concerning findings.

Case:
Patient: Alex Kim, Procedure: Hernia Repair, Date: 04/11/2025. Awake, mild nausea, dressing clean.
Post-Op Note:
"""

# Send the prompt to Gemini
response = chat.chat(prompt)
print(response)

ImportError: cannot import name 'chat' from 'google.colab' (/usr/local/lib/python3.11/dist-packages/google/colab/__init__.py)

In [None]:
# 📦 Install required packages (for Google Colab)
!pip install transformers datasets rouge-score fastapi uvicorn[standard] bitsandbytes accelerate --quiet


## 1. Summarize Text with Hugging Face BART

In [None]:
# ✅ Load a BART model pre-trained for summarization
from transformers import pipeline

# Create a summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Input text
text = "Long article text goes here..."

# Generate the summary
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
print("📝 Summary:", summary[0]['summary_text'])


## 2. Fine-tune BART on CNN/DailyMail

In [None]:
# ✅ Fine-tune BART using Hugging Face `Trainer` API
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer

# Load a small portion of the dataset for demonstration
dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:1%]")
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")

# Preprocess function for summarization
def preprocess(examples):
    inputs = tokenizer(examples["article"], truncation=True, padding="max_length", max_length=512)
    targets = tokenizer(examples["highlights"], truncation=True, padding="max_length", max_length=128)
    inputs["labels"] = targets["input_ids"]
    return inputs

# Tokenize the dataset
tokenized_dataset = dataset.map(preprocess, batched=True)

# Load the model
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base")

# Training configuration
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    num_train_epochs=1,
    logging_steps=10
)

# Fine-tune the model
trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_dataset)
trainer.train()


## 3. Extractive vs. Abstractive Summarization

In [None]:
# ✅ Extractive summarization with spaCy (highlights original sentences)
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from heapq import nlargest

text = "Long article text here..."
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

# Calculate word frequencies
word_freq = {}
for word in doc:
    if word.text.lower() not in STOP_WORDS and word.is_alpha:
        word_freq[word.text.lower()] = word_freq.get(word.text.lower(), 0) + 1

# Score sentences based on word frequency
sentence_scores = {}
for sent in doc.sents:
    for word in sent:
        if word.text.lower() in word_freq:
            sentence_scores[sent] = sentence_scores.get(sent, 0) + word_freq[word.text.lower()]

# Extract top 3 sentences
summary_sentences = nlargest(3, sentence_scores, key=sentence_scores.get)
summary = " ".join([sent.text for sent in summary_sentences])
print("📝 Extractive Summary:", summary)


## 4. Summarize Long Documents

In [None]:
# ✅ Handle long documents using chunking
def split_text(text, chunk_size=400):
    words = text.split()
    for i in range(0, len(words), chunk_size):
        yield " ".join(words[i:i + chunk_size])

# Example input
long_text = "Very long document text..."

chunks = list(split_text(long_text))

from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Summarize each chunk
summary_parts = [summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
full_summary = " ".join(summary_parts)
print("📝 Full Summary:", full_summary)


## 5. ROUGE Evaluation

In [None]:
# ✅ Evaluate summarization quality using ROUGE metric
from datasets import load_metric

rouge = load_metric("rouge")

# Example prediction and reference
predictions = ["The company posted strong revenue growth and plans expansion."]
references = ["The company reported revenue increase and future expansion."]

# Compute ROUGE scores
results = rouge.compute(predictions=predictions, references=references)
print("📊 ROUGE Scores:", results)


## 6. Prompt-based Summarization (Chat Models)

In [None]:
# ✅ Summarize using chat/instruction-tuned LLMs
from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

prompt = "Summarize this article:\n" + "Long article..." + "\nSummary:"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

output_ids = model.generate(input_ids, max_new_tokens=150)
summary = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("📝 Prompt-based Summary:", summary)


## 7. Batch Summarization from CSV

In [None]:
# ✅ Load and summarize articles from CSV
import pandas as pd
from transformers import pipeline

df = pd.read_csv("articles.csv")  # Assume column: 'content'
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Generate summaries for each row
df["summary"] = df["content"].apply(lambda x: summarizer(x, max_length=130, min_length=30, do_sample=False)[0]['summary_text'])
df.to_csv("summaries.csv", index=False)
print("✅ Summaries saved to summaries.csv")


## 8. REST API with FastAPI

In [None]:
# ✅ Build a summarization REST API with FastAPI
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline

app = FastAPI()
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

class TextRequest(BaseModel):
    text: str

@app.post("/summarize")
def summarize(req: TextRequest):
    result = summarizer(req.text, max_length=130, min_length=30, do_sample=False)
    return {"summary": result[0]['summary_text']}

# ➤ To run: save as app.py and run `uvicorn app:app --reload`


## 9. Quantized Summarization (4-bit LLM)

In [None]:
# ✅ Use quantized LLMs for memory-efficient summarization
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True)
model = AutoModelForCausalLM.from_pretrained("TheBloke/LLaMA-2-7B-GGML", quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("TheBloke/LLaMA-2-7B-GGML")

# Inference would proceed as usual using tokenizer and model


## 10. Multilingual Summarization (mBART)

In [None]:
# ✅ Summarize multilingual text using mBART
from transformers import MBartTokenizer, MBartForConditionalGeneration

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25")

text = "Texte en français ici..."  # French input
tokenizer.src_lang = "fr_XX"

input_ids = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).input_ids
summary_ids = model.generate(input_ids, max_length=100)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("📝 French Summary:", summary)
