In [None]:
pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=c94303b5f480a072dcd12d688956c8a1baea473c9c1fd4cfb984ecc8e2b35958
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
import json
import numpy as np
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer

In [None]:
def calculate_average_scores(json_file):
    with open(json_file, 'r') as file:
        lines = file.readlines()

    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    total_bleu = 0
    total_rouge1 = 0
    total_rouge2 = 0
    total_rougeL = 0
    count = 0

    for line in lines:
        data = json.loads(line)
        pred = data.get("pred", "").strip("[] ")
        label = data.get("label", "").strip("[] ")

        # Tokenize sentences
        pred_tokens = pred.split()
        label_tokens = label.split()

        # BLEU score
        bleu_score = sentence_bleu([label_tokens], pred_tokens)
        total_bleu += bleu_score

        # ROUGE scores
        rouge_score = rouge.score(label, pred)
        total_rouge1 += rouge_score['rouge1'].fmeasure
        total_rouge2 += rouge_score['rouge2'].fmeasure
        total_rougeL += rouge_score['rougeL'].fmeasure

        count += 1

    # Calculate averages
    avg_bleu = total_bleu / count
    avg_rouge1 = total_rouge1 / count
    avg_rouge2 = total_rouge2 / count
    avg_rougeL = total_rougeL / count

    return {
        "Average BLEU": avg_bleu,
        "Average ROUGE-1": avg_rouge1,
        "Average ROUGE-2": avg_rouge2,
        "Average ROUGE-L": avg_rougeL
    }


In [None]:
# Model 1:

json_file_path = "fb_lora_m1_test_fireball_inputs_preds_labels.jsonl"
average_scores = calculate_average_scores(json_file_path)

print("Average Scores:")
print(f"  BLEU: {average_scores['Average BLEU']:.4f}")
print(f"  ROUGE-1 F1: {average_scores['Average ROUGE-1']:.4f}")
print(f"  ROUGE-2 F1: {average_scores['Average ROUGE-2']:.4f}")
print(f"  ROUGE-L F1: {average_scores['Average ROUGE-L']:.4f}")

Average Scores:
  BLEU: 0.0243
  ROUGE-1 F1: 0.4540
  ROUGE-2 F1: 0.1381
  ROUGE-L F1: 0.4514


In [None]:
# Model 2:

json_file_path = "fb_lora_m2_test_fireball_inputs_preds_labels.jsonl"
average_scores = calculate_average_scores(json_file_path)

print("Average Scores:")
print(f"  BLEU: {average_scores['Average BLEU']:.4f}")
print(f"  ROUGE-1 F1: {average_scores['Average ROUGE-1']:.4f}")
print(f"  ROUGE-2 F1: {average_scores['Average ROUGE-2']:.4f}")
print(f"  ROUGE-L F1: {average_scores['Average ROUGE-L']:.4f}")

Average Scores:
  BLEU: 0.0505
  ROUGE-1 F1: 0.5374
  ROUGE-2 F1: 0.2323
  ROUGE-L F1: 0.5337


In [None]:
# Model 3:

json_file_path = "fb_lora_m3_test_fireball_inputs_preds_labels.jsonl"
average_scores = calculate_average_scores(json_file_path)

print("Average Scores:")
print(f"  BLEU: {average_scores['Average BLEU']:.4f}")
print(f"  ROUGE-1 F1: {average_scores['Average ROUGE-1']:.4f}")
print(f"  ROUGE-2 F1: {average_scores['Average ROUGE-2']:.4f}")
print(f"  ROUGE-L F1: {average_scores['Average ROUGE-L']:.4f}")

Average Scores:
  BLEU: 0.0106
  ROUGE-1 F1: 0.4907
  ROUGE-2 F1: 0.1738
  ROUGE-L F1: 0.4852


In [None]:
# Model 4:

json_file_path = "fb_lora_m4_test_fireball_inputs_preds_labels.jsonl"
average_scores = calculate_average_scores(json_file_path)

print("Average Scores:")
print(f"  BLEU: {average_scores['Average BLEU']:.4f}")
print(f"  ROUGE-1 F1: {average_scores['Average ROUGE-1']:.4f}")
print(f"  ROUGE-2 F1: {average_scores['Average ROUGE-2']:.4f}")
print(f"  ROUGE-L F1: {average_scores['Average ROUGE-L']:.4f}")

Average Scores:
  BLEU: 0.0035
  ROUGE-1 F1: 0.4048
  ROUGE-2 F1: 0.0628
  ROUGE-L F1: 0.4029


In [None]:
# Model 5:

json_file_path = "fb_lora_m5_test_fireball_inputs_preds_labels.jsonl"
average_scores = calculate_average_scores(json_file_path)

print("Average Scores:")
print(f"  BLEU: {average_scores['Average BLEU']:.4f}")
print(f"  ROUGE-1 F1: {average_scores['Average ROUGE-1']:.4f}")
print(f"  ROUGE-2 F1: {average_scores['Average ROUGE-2']:.4f}")
print(f"  ROUGE-L F1: {average_scores['Average ROUGE-L']:.4f}")

Average Scores:
  BLEU: 0.0315
  ROUGE-1 F1: 0.4269
  ROUGE-2 F1: 0.1217
  ROUGE-L F1: 0.4230


**Bar Graphs for ROGUE & BLEU**

In [54]:
# Bar graph for ROUGE -1 scores
import matplotlib.pyplot as plt
import seaborn as sns

def plot_rouge1_scores(models, descriptions, rouge1_scores, output_file):
    """
    Generates a bar graph for ROUGE-1 scores of given models with boxed descriptions at the top.
    Saves the graph as an image.

    Parameters:
    - models: List of model names (x-axis labels).
    - descriptions: List of descriptions for the models to display in a box.
    - rouge1_scores: List of ROUGE-1 scores corresponding to the models.
    - output_file: Path to save the bar graph image.
    """
    # Set up the figure
    plt.figure(figsize=(8, 6))

    # Generate the color palette
    colors = sns.color_palette("coolwarm", len(models))

    # Create the bar graph with custom colors
    bars = plt.bar(models, rouge1_scores, color=colors, edgecolor='black', linewidth=1.2)

    # Add titles and labels
    plt.title('ROUGE-1 Scores of Models', fontsize=16)
    plt.xlabel('Models', fontsize=14)
    plt.ylabel('ROUGE-1 Score', fontsize=14)
    plt.ylim(0, 1)

    # Add values on top of bars
    for i, score in enumerate(rouge1_scores):
        plt.text(i, score + 0.02, f"{score:.2f}", ha='center', fontsize=12)

    # Add model descriptions inside a text box
    description_text = '\n'.join([f"{models[i]}: {descriptions[i]}" for i in range(len(models))])
    plt.gca().text(
        0.5, 0.97,  # Adjust y-position to lower the description box
        description_text, ha='center', va='top', fontsize=12,
        transform=plt.gca().transAxes,
        bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5')
    )

    # Save the figure
    plt.savefig(output_file, format='png', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Bar graph saved as {output_file}")

# Example usage
models = ["Model 1", "Model 2", "Model 3", "Model 4", "Model 5"]
descriptions = [
    "utter_cactor_bcombat",
    "utter_cactor",
    "bcombat_cactor",
    "cactor",
    "utter"
]
rouge1_scores = [0.54, 0.45, 0.49, 0.40, 0.43]
output_file = "rouge1_scores.png"

plot_rouge1_scores(models, descriptions, rouge1_scores, output_file)


Bar graph saved as rouge1_scores.png


In [55]:
# Bar graph for ROUGE -2 scores
import matplotlib.pyplot as plt
import seaborn as sns

def plot_rouge2_scores(models, descriptions, rouge2_scores, output_file):

    # Set up the figure
    plt.figure(figsize=(8, 6))

    # Generate the color palette
    colors = sns.color_palette("coolwarm", len(models))

    # Create the bar graph with custom colors
    bars = plt.bar(models, rouge2_scores, color=colors, edgecolor='black', linewidth=1.2)

    # Add titles and labels
    plt.title('ROUGE-2 Scores of Models', fontsize=16)
    plt.xlabel('Models', fontsize=14)
    plt.ylabel('ROUGE-2 Score', fontsize=14)
    plt.ylim(0, 1)

    # Add values on top of bars
    for i, score in enumerate(rouge2_scores):
        plt.text(i, score + 0.02, f"{score:.2f}", ha='center', fontsize=12)

    # Add model descriptions inside a text box
    description_text = '\n'.join([f"{models[i]}: {descriptions[i]}" for i in range(len(models))])
    plt.gca().text(
        0.5, 0.97,  # Adjust y-position to lower the description box
        description_text, ha='center', va='top', fontsize=12,
        transform=plt.gca().transAxes,
        bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5')
    )

    # Save the figure
    plt.savefig(output_file, format='png', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Bar graph saved as {output_file}")


models = ["Model 1", "Model 2", "Model 3", "Model 4", "Model 5"]
descriptions = [
    "utter_cactor_bcombat",
    "utter_cactor",
    "bcombat_cactor",
    "cactor",
    "utter"
]
rouge2_scores = [0.14, 0.23, 0.17, 0.06, 0.12]
output_file = "rouge2_scores.png"

plot_rouge2_scores(models, descriptions, rouge2_scores, output_file)


Bar graph saved as rouge2_scores.png


In [63]:
# Bar graph for ROUGE -L scores
import matplotlib.pyplot as plt
import seaborn as sns

def plot_rougeL_scores(models, descriptions, rougeL_scores, output_file):

    # Set up the figure
    plt.figure(figsize=(8, 6))

    # Generate the color palette
    colors = sns.color_palette("coolwarm", len(models))

    # Create the bar graph with custom colors
    bars = plt.bar(models, rougeL_scores, color=colors, edgecolor='black', linewidth=1.2)

    # Add titles and labels
    plt.title('ROUGE-L Scores of Models', fontsize=16)
    plt.xlabel('Models', fontsize=14)
    plt.ylabel('ROUGE-L Score', fontsize=14)
    plt.ylim(0, 1)

    # Add values on top of bars
    for i, score in enumerate(rougeL_scores):
        plt.text(i, score + 0.02, f"{score:.2f}", ha='center', fontsize=12)

    # Add model descriptions inside a text box
    description_text = '\n'.join([f"{models[i]}: {descriptions[i]}" for i in range(len(models))])
    plt.gca().text(
        0.5, 0.97,  # Adjust y-position to lower the description box
        description_text, ha='center', va='top', fontsize=12,
        transform=plt.gca().transAxes,
        bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5')
    )

    # Save the figure
    plt.savefig(output_file, format='png', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Bar graph saved as {output_file}")


models = ["Model 1", "Model 2", "Model 3", "Model 4", "Model 5"]
descriptions = [
    "utter_cactor_bcombat",
    "utter_cactor",
    "bcombat_cactor",
    "cactor",
    "utter"
]
rougeL_scores = [0.45, 0.53, 0.49, 0.40, 0.42]
output_file = "rougeL_scores.png"

plot_rougeL_scores(models, descriptions, rougeL_scores, output_file)


Bar graph saved as rougeL_scores.png


**BLEU Bar Graph**

In [62]:
# Bar graph for BLEU scores

import matplotlib.pyplot as plt
import seaborn as sns

def plot_bleu(models, descriptions, bleu_scores, output_file):

    # Set up the figure
    plt.figure(figsize=(8, 6))

    # Generate the color palette
    colors = sns.color_palette("coolwarm", len(models))

    # Create the bar graph with custom colors
    bars = plt.bar(models, bleu_scores, color=colors, edgecolor='black', linewidth=1.2)

    # Add titles and labels
    plt.title('BLEU Scores of Models', fontsize=16)
    plt.xlabel('Models', fontsize=14)
    plt.ylabel('BLEU Scores', fontsize=14)
    plt.ylim(0, 0.5)

    # Add values on top of bars
    for i, score in enumerate(bleu_scores):
        plt.text(i, score + 0.02, f"{score:.2f}", ha='center', fontsize=12)

    # Add model descriptions inside a text box
    description_text = '\n'.join([f"{models[i]}: {descriptions[i]}" for i in range(len(models))])
    plt.gca().text(
        0.5, 0.97,  # Adjust y-position to lower the description box
        description_text, ha='center', va='top', fontsize=12,
        transform=plt.gca().transAxes,
        bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5')
    )

    # Save the figure
    plt.savefig(output_file, format='png', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Bar graph saved as {output_file}")


models = ["Model 1", "Model 2", "Model 3", "Model 4", "Model 5"]
descriptions = [
    "utter_cactor_bcombat",
    "utter_cactor",
    "bcombat_cactor",
    "cactor",
    "utter"
]
bleu_scores = [0.0243, 0.0505, 0.0105, 0.0035, 0.0315]
output_file = "bleu_scores.png"

plot_bleu(models, descriptions, bleu_scores, output_file)


Bar graph saved as bleu_scores.png


**Cosine Similarity**

In [None]:
import json
json_file_path = "fb_lora_m5_test_fireball_inputs_preds_labels.jsonl"
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
with open(json_file_path, 'r') as file:
  lines = file.readlines()
def get_cosine_similarity(sentence1, sentence2):
    # Convert sentences into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([sentence1, sentence2])
    # Compute cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
    return cosine_sim[0][0]
ans=[]
for i in range(len(lines)):
  line=lines[i]
  data = json.loads(line)
  sentence1 = data.get("pred", "").strip("")
  sentence2 = data.get("label", "").strip("")
  similarity = get_cosine_similarity(sentence1, sentence2)
  ans.append(similarity)
avg = sum(ans) / len(ans)
print(avg)

0.2628606223214234


In [None]:
import json
json_file_path = "/content/fb_lora_m1_test_fireball_inputs_preds_labels.jsonl"
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
with open(json_file_path, 'r') as file:
  lines = file.readlines()
def get_cosine_similarity(sentence1, sentence2):
    # Convert sentences into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([sentence1, sentence2])
    # Compute cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
    return cosine_sim[0][0]
ans=[]
for i in range(len(lines)):
  line=lines[i]
  data = json.loads(line)
  sentence1 = data.get("pred", "").strip("")
  sentence2 = data.get("label", "").strip("")
  similarity = get_cosine_similarity(sentence1, sentence2)
  ans.append(similarity)
avg1 = sum(ans) / len(ans)
print(avg1)

0.3035742386182895


In [None]:
import json
json_file_path = "/content/fb_lora_m2_test_fireball_inputs_preds_labels.jsonl"
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
with open(json_file_path, 'r') as file:
  lines = file.readlines()
def get_cosine_similarity(sentence1, sentence2):
    # Convert sentences into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([sentence1, sentence2])
    # Compute cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
    return cosine_sim[0][0]
ans=[]
for i in range(len(lines)):
  line=lines[i]
  data = json.loads(line)
  sentence1 = data.get("pred", "").strip("")
  sentence2 = data.get("label", "").strip("")
  similarity = get_cosine_similarity(sentence1, sentence2)
  ans.append(similarity)
avg2 = sum(ans) / len(ans)
print(avg2)

0.3731405640502444


In [None]:
import json
json_file_path = "/content/fb_lora_m3_test_fireball_inputs_preds_labels.jsonl"
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
with open(json_file_path, 'r') as file:
  lines = file.readlines()
def get_cosine_similarity(sentence1, sentence2):
    # Convert sentences into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([sentence1, sentence2])
    # Compute cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
    return cosine_sim[0][0]
ans=[]
for i in range(len(lines)):
  line=lines[i]
  data = json.loads(line)
  sentence1 = data.get("pred", "").strip("")
  sentence2 = data.get("label", "").strip("")
  similarity = get_cosine_similarity(sentence1, sentence2)
  ans.append(similarity)
avg3 = sum(ans) / len(ans)
print(avg3)

0.30996187343732035


In [None]:
import json
json_file_path = "/content/fb_lora_m4_test_fireball_inputs_preds_labels.jsonl"
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
with open(json_file_path, 'r') as file:
  lines = file.readlines()
def get_cosine_similarity(sentence1, sentence2):
    # Convert sentences into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([sentence1, sentence2])
    # Compute cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
    return cosine_sim[0][0]
ans=[]
for i in range(len(lines)):
  line=lines[i]
  data = json.loads(line)
  sentence1 = data.get("pred", "").strip("")
  sentence2 = data.get("label", "").strip("")
  similarity = get_cosine_similarity(sentence1, sentence2)
  ans.append(similarity)
avg4 = sum(ans) / len(ans)
print(avg4)

0.23226209984763513


In [None]:
import json
json_file_path = "/content/fb_lora_m5_test_fireball_inputs_preds_labels.jsonl"
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
with open(json_file_path, 'r') as file:
  lines = file.readlines()
def get_cosine_similarity(sentence1, sentence2):
    # Convert sentences into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([sentence1, sentence2])
    # Compute cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
    return cosine_sim[0][0]
ans=[]
for i in range(len(lines)):
  line=lines[i]
  data = json.loads(line)
  sentence1 = data.get("pred", "").strip("")
  sentence2 = data.get("label", "").strip("")
  similarity = get_cosine_similarity(sentence1, sentence2)
  ans.append(similarity)
avg5 = sum(ans) / len(ans)
print(avg5)

0.2628606223214234


In [57]:
# Bar graph for cosine similarity

import matplotlib.pyplot as plt
import seaborn as sns

def plot_cosine_sim(models, descriptions, rouge1_scores, output_file):
    """
    Generates a bar graph for ROUGE-1 scores of given models with boxed descriptions at the top.
    Saves the graph as an image.

    Parameters:
    - models: List of model names (x-axis labels).
    - descriptions: List of descriptions for the models to display in a box.
    - rouge1_scores: List of ROUGE-1 scores corresponding to the models.
    - output_file: Path to save the bar graph image.
    """
    # Set up the figure
    plt.figure(figsize=(8, 6))

    # Generate the color palette
    colors = sns.color_palette("coolwarm", len(models))

    # Create the bar graph with custom colors
    bars = plt.bar(models, cosine_sim_scores, color=colors, edgecolor='black', linewidth=1.2)

    # Add titles and labels
    plt.title('Cosine Similarity of Models', fontsize=16)
    plt.xlabel('Models', fontsize=14)
    plt.ylabel('Cosine Similarity', fontsize=14)
    plt.ylim(0, 1)

    # Add values on top of bars
    for i, score in enumerate(cosine_sim_scores):
        plt.text(i, score + 0.02, f"{score:.2f}", ha='center', fontsize=12)

    # Add model descriptions inside a text box
    description_text = '\n'.join([f"{models[i]}: {descriptions[i]}" for i in range(len(models))])
    plt.gca().text(
        0.5, 0.97,  # Adjust y-position to lower the description box
        description_text, ha='center', va='top', fontsize=12,
        transform=plt.gca().transAxes,
        bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5')
    )

    # Save the figure
    plt.savefig(output_file, format='png', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Bar graph saved as {output_file}")


models = ["Model 1", "Model 2", "Model 3", "Model 4", "Model 5"]
descriptions = [
    "utter_cactor_bcombat",
    "utter_cactor",
    "bcombat_cactor",
    "cactor",
    "utter"
]
cosine_sim_scores = [0.3, 0.37, 0.31, 0.23, 0.26]
output_file = "cosine_sim.png"

plot_cosine_sim(models, descriptions, rouge1_scores, output_file)


Bar graph saved as cosine_sim.png
