## ROUGE SCORE

In [3]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py): started
  Building wheel for rouge_score (setup.py): finished with status 'done'
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24972 sha256=94d740716cf8cbb06ab679ca6988d941829d0128118b77add0d2e0e9475c1508
  Stored in directory: c:\users\sheth\appdata\local\pip\cache\wheels\85\9d\af\01feefbe7d55ef5468796f0c68225b6788e85d9d0a281e7a70
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [13]:
import pandas as pd
from rouge_score import rouge_scorer
import numpy as np

# Load dataset
df = pd.read_csv("movies_sub.csv")


In [9]:

df = df.dropna(subset=["IMDb Summary"])

df

Unnamed: 0,Movie_ID,Movie_Name,Subtitle Transcript,IMDb Summary,Llama Summary Zero Shot,Llama Summary Few Shot,Completed
0,1,Toy Story (1995),sergeant yes sir establish a recon post downst...,A little boy named Andy loves to be in his roo...,"In a world where toys come to life, a showdown...","In a world where toys come to life, a showdown...",True
1,2,GoldenEye (1995),when the world is the target 72 hours ago a se...,When a deadly satellite weapon system falls in...,"In a world on the brink of destruction, a secr...","When a secret weapon system, GoldenEye, is det...",True
2,3,Four Rooms (1995),this year Miramax films takes great pride in e...,This movie features the collaborative director...,"At the mysterious Mon Senor hotel, a lone bell...","At the mysterious Mon Senor hotel, a young bel...",True
3,4,Get Shorty (1995),[Music] in a town known for fame [Music] wealt...,"Some guys get all the luck, whether they like ...","In the town of Hollywood, where fame and wealt...","In the town of Hollywood, where fame and wealt...",True
4,5,Copycat (1995),what turns on a killer is the suffering and de...,"In San Francisco, the criminal psychologist He...","In the gripping thriller Copycat, a cunning se...","In a chilling game of cat and mouse, a cunning...",True
...,...,...,...,...,...,...,...
1629,1677,Sweet Nothing (1995),a living legend to you all in the legend of a ...,Angel celebrates the birth of his daughter by ...,"In ""Sweet Nothing,"" Angelo Gazzetta, a 29-year...","Angelo Gazzetta, a 29-year-old husband and fat...",True
1630,1678,Mat' i syn (1997),International Film circuit is proud to present...,A slow and poignant story of love and patience...,"In ""Mat' i syn,"" a breathtaking new film, a po...","In the internationally-acclaimed film ""Mat' i ...",True
1631,1679,B. Monkey (1998),this next one's for all the romantics out ther...,Alan is a schoolteacher in London who also moo...,"In the streets of London, a mysterious woman k...","In the streets of London, a mysterious woman k...",True
1632,1681,You So Crazy (1994),give us the fild boy yo y'all ain't getting my...,"Martin Lawrence delivers raw, unfiltered stand...","In ""You So Crazy,"" a fiery comedian takes a st...","In a defiant stand, a charismatic artist refus...",True


In [19]:

# Extract summaries
imdb_summaries = df["IMDb Summary"].astype(str).tolist()
llama_zero_shot = df["Llama Summary Zero Shot"].astype(str).tolist()
llama_few_shot = df["Llama Summary Few Shot"].astype(str).tolist()

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Function to compute ROUGE scores
def compute_rouge_scores(reference_texts, generated_texts):
    results = { "R1_P": [], "R1_R": [], "R1_F1": [], 
                "R2_P": [], "R2_R": [], "R2_F1": [], 
                "RL_P": [], "RL_R": [], "RL_F1": [] }

    for ref, gen in zip(reference_texts, generated_texts):
        scores = scorer.score(ref, gen)

        results["R1_P"].append(scores["rouge1"].precision)
        results["R1_R"].append(scores["rouge1"].recall)
        results["R1_F1"].append(scores["rouge1"].fmeasure)

        results["R2_P"].append(scores["rouge2"].precision)
        results["R2_R"].append(scores["rouge2"].recall)
        results["R2_F1"].append(scores["rouge2"].fmeasure)

        results["RL_P"].append(scores["rougeL"].precision)
        results["RL_R"].append(scores["rougeL"].recall)
        results["RL_F1"].append(scores["rougeL"].fmeasure)

    # Compute mean scores
    avg_scores = {key: round(np.mean(values), 3) for key, values in results.items()}
    return avg_scores

# Compute average ROUGE scores (only for non-missing IMDb summaries)
rouge_zero_shot_avg = compute_rouge_scores(imdb_summaries, llama_zero_shot)
rouge_few_shot_avg = compute_rouge_scores(imdb_summaries, llama_few_shot)

# Create results DataFrame
result_df = pd.DataFrame([
    ["LLaMA", "Zero"] + list(rouge_zero_shot_avg.values()),
    ["LLaMA", "Few"] + list(rouge_few_shot_avg.values())
], columns=["Model", "Setup", "R1-P", "R1-R", "R1-F1", "R2-P", "R2-R", "R2-F1", "RL-P", "RL-R", "RL-F1"])

# Save results
result_df.to_csv("rouge_summary_results.csv", index=False)

print(result_df)


   Model Setup   R1-P   R1-R  R1-F1   R2-P   R2-R  R2-F1   RL-P   RL-R  RL-F1
0  LLaMA  Zero  0.287  0.315  0.277  0.041  0.046  0.040  0.162  0.186  0.159
1  LLaMA   Few  0.299  0.298  0.275  0.042  0.043  0.038  0.171  0.177  0.159


## BERT SCORE

In [13]:
!pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting torch>=1.0.0 (from bert_score)
  Using cached torch-2.6.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting sympy==1.13.1 (from torch>=1.0.0->bert_score)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
   ---------------------------------------- 0.0/61.1 kB ? eta -:--:--
   ------ --------------------------------- 10.2/61.1 kB ? eta -:--:--
   -------------------------- ------------- 41.0/61.1 kB 653.6 kB/s eta 0:00:01
   ---------------------------------------- 61.1/61.1 kB 652.5 kB/s eta 0:00:00
Downloading torch-2.6.0-cp312-cp312-win_amd64.whl (204.1 MB)
   ---------------------------------------- 0.0/204.1 MB ? eta -:--:--
   ---------------------------------------- 0.1/204.1 MB ? eta -:--:--
   ---------------------------------------- 0.1/204.1 MB 1.3 MB/s eta 0:02:35
   ---------------------------------------- 

In [15]:
import pandas as pd
import torch
from bert_score import score
import numpy as np

# Load dataset
df = pd.read_csv("movies_sub.csv")

# Remove rows where "IMDB Summary" is missing
df = df.dropna(subset=["IMDb Summary"])

# Extract summaries
imdb_summaries = df["IMDb Summary"].astype(str).tolist()
llama_zero_shot = df["Llama Summary Zero Shot"].astype(str).tolist()
llama_few_shot = df["Llama Summary Few Shot"].astype(str).tolist()

# Compute BERTScore
def compute_bertscore(reference_texts, generated_texts, model_type="microsoft/deberta-xlarge-mnli"):
    P, R, F1 = score(generated_texts, reference_texts, model_type=model_type, lang="en", device="cuda" if torch.cuda.is_available() else "cpu")
    return {"P": np.mean(P.numpy()), "R": np.mean(R.numpy()), "F1": np.mean(F1.numpy())}

# Compute scores
bertscore_zero_shot = compute_bertscore(imdb_summaries, llama_zero_shot)
bertscore_few_shot = compute_bertscore(imdb_summaries, llama_few_shot)

# Create results DataFrame
result_df = pd.DataFrame([
    ["LLaMA", "Zero", bertscore_zero_shot["P"], bertscore_zero_shot["R"], bertscore_zero_shot["F1"]],
    ["LLaMA", "Few", bertscore_few_shot["P"], bertscore_few_shot["R"], bertscore_few_shot["F1"]]
], columns=["Model", "Setup", "BERT-P", "BERT-R", "BERT-F1"])

# Save results
result_df.to_csv("bertscore_summary_results.csv", index=False)

print(result_df)


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.04G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.04G [00:00<?, ?B/s]

model.safetensors:  34%|###4      | 1.04G/3.04G [00:00<?, ?B/s]

   Model Setup    BERT-P    BERT-R   BERT-F1
0  LLaMA  Zero  0.579694  0.577413  0.577760
1  LLaMA   Few  0.587101  0.574420  0.579865
