In [9]:
%pip install --upgrade --quiet tiktoken langchain langgraph beautifulsoup4 langchain langchain-google-genai langchain-huggingface rouge-score datasets

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone


In [10]:
from google.colab import userdata
import os
from datasets import load_dataset
from rouge_score import rouge_scorer
from langchain_google_genai import ChatGoogleGenerativeAI

In [11]:
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

In [12]:
def load_llm(model="gemini-1.5-pro"):

  if model == "gemini-1.5-pro":
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-pro",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2)
    return llm
  elif model == "gemini-1.5-flash":
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2)
    return llm
  else:
    raise ValueError("Invalid model name")

In [26]:
from langchain_core.prompts import ChatPromptTemplate


def get_prompt_template():
  # Define prompt
  prompt = ChatPromptTemplate.from_messages(
      [
          (
              "system",
              "Write a concise summary of the following in {num_words} words:\\n\\n",
          ),
          ("human", "{context}")
      ]
  )

  return prompt


def summarize_text(text, num_words=50, model="gemini-1.5-pro"):

  llm = load_llm(model)
  prompt = get_prompt_template()
  chain = prompt | llm
  result = chain.invoke({
        "context": text,
        "num_words": num_words
      })
  return result.content

In [14]:
dataset = load_dataset("cnn_dailymail", "3.0.0", split="test")
texts = dataset['article'][6:11]
references = dataset['highlights'][6:11]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/15.6k [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [15]:
def calculate_rouge(summaries, references):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = {'rouge1': 0, 'rouge2': 0, 'rougeL': 0}
    i=0

    for summary, reference in zip(summaries, references):
        i+=1
        score = scorer.score(reference, summary)
        print(f'rouge1 score of summary{i}:{score["rouge1"].fmeasure}')
        print(f'rouge2 score of summary{i}:{score["rouge2"].fmeasure}')
        print(f'rougeL score of summary{i}:{score["rougeL"].fmeasure}')
        scores['rouge1'] += score['rouge1'].fmeasure
        scores['rouge2'] += score['rouge2'].fmeasure
        scores['rougeL'] += score['rougeL'].fmeasure


    # Average the ROUGE scores
    num_samples = len(summaries)
    scores = {k: v / num_samples for k, v in scores.items()}
    return scores

In [16]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

nltk.download('punkt')

def calculate_bleu(summaries, references):
    bleu_scores = []
    smoothie = SmoothingFunction().method4
    i=0
    for summary, reference in zip(summaries, references):
        i+=1
        reference_tokens = [reference.split()]
        summary_tokens = summary.split()

        score = sentence_bleu(reference_tokens, summary_tokens, smoothing_function=smoothie)
        bleu_scores.append(score)
        print(f"BLEU score of summary{i}: {score}")


    average_bleu = sum(bleu_scores) / len(bleu_scores)
    return average_bleu

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [23]:
def run_summarization(text,model_name, num_words):

  summaries = [summarize_text(text,num_words, model_name) for text in texts]

  for i, summary in enumerate(summaries):
    print(f"\nGenerated Summary {i+1} ({model_name}):\n", summary)

  rouge_scorers = calculate_rouge(summaries, references)
  print(f"\nROUGE Scores for {model_name}:", rouge_scorers)
  bleu_score = calculate_bleu(summaries, references)
  print(f"Average BLEU score: {bleu_score}")

In [27]:
run_summarization(texts,'gemini-1.5-flash', 50)


Generated Summary 1 (gemini-1.5-flash):
 Amnesty International's report highlights a global trend of using the death penalty for terrorism, despite a decrease in executions. While some regions show progress towards abolition, the number of death sentences increased in 2014, driven by mass sentencing in countries like Egypt and Nigeria. The report criticizes judicial processes in many countries, emphasizing the need for fair trials and abolition of the death penalty. 


Generated Summary 2 (gemini-1.5-flash):
 Andrew Getty, heir to the Getty oil fortune, died at 47. The coroner's preliminary assessment indicates natural causes, with no foul play suspected. Getty had health issues and medication was found at his home. His family confirmed his death and requested privacy. Getty had recently filed for a restraining order against an ex-girlfriend, citing a serious medical condition. 


Generated Summary 3 (gemini-1.5-flash):


Generated Summary 4 (gemini-1.5-flash):
 Bob Barker, the legend

In [29]:
run_summarization(texts,'gemini-1.5-pro', 50)




Generated Summary 1 (gemini-1.5-pro):
 Amnesty International's 2014 death penalty report reveals a global decrease in executions but a surge in death sentences.  Governments, particularly in China and Pakistan, increasingly justify capital punishment using terrorism threats.  While positive trends exist, flawed judicial processes remain a concern in many countries still applying the death penalty.


Generated Summary 2 (gemini-1.5-pro):
 Andrew Getty, 47, heir to the Getty oil fortune, was found dead in his Los Angeles home.  Preliminary findings suggest natural causes, possibly related to pre-existing health issues.  No foul play is suspected, though a full autopsy is pending.


Generated Summary 3 (gemini-1.5-pro):


Generated Summary 4 (gemini-1.5-pro):
 Bob Barker, 91, made a surprise return to "The Price is Right" on April 1st, 2015.  He hosted the first game, "Lucky Seven," marking his first appearance since retiring in 2007.  Drew Carey then resumed hosting duties.


Generated 