# BLUE and ROUGE

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [None]:
# Helper functions
def load_jsonl(file_path):
    with open(file_path, 'r') as file:
        return [json.loads(line) for line in file.readlines()]

def calculate_bleu_scores_with_smoothing(gpt4_descs, llava_descs):
    smoothie = SmoothingFunction().method4
    bleu_scores = []
    for gpt4_text, llava_text in zip(gpt4_descs, llava_descs):
        reference = [gpt4_text.split()]  # BLEU expects list of words
        candidate = llava_text.split()
        bleu_score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smoothie)
        bleu_scores.append(bleu_score)
    return sum(bleu_scores) / len(bleu_scores)

def lcs(x, y):
    x = [xi.lower() for xi in x.split()]
    y = [yi.lower() for yi in y.split()]
    n, m = len(x), len(y)
    L = [[0] * (m + 1) for _ in range(n + 1)]
    for i in range(n):
        for j in range(m):
            if x[i] == y[j]:
                L[i + 1][j + 1] = L[i][j] + 1
            else:
                L[i + 1][j + 1] = max(L[i + 1][j], L[i][j + 1])
    return L[-1][-1]

def rouge_l(reference, hypothesis):
    lcs_len = lcs(reference, hypothesis)
    if lcs_len == 0:
        return 0  # Avoid division by zero
    prec = lcs_len / len(hypothesis.split())
    rec = lcs_len / len(reference.split())
    f1 = 2 * prec * rec / (prec + rec)
    return f1

In [None]:
# Load data
gpt4_data = load_jsonl('gpt4_response.jsonl')
llava_data = load_jsonl('llava_response.jsonl')
gpt4_descriptions = [line.strip() for line in gpt4_data]
llava_dataset_sizes = [int(size) for entry in llava_data for size in entry.keys()]
llava_descriptions = [entry[str(size)] for entry in llava_data for size in entry.keys()]

In [None]:
# Calculate metrics
bleu_scores = [calculate_bleu_scores_with_smoothing(gpt4_descriptions, desc) for desc in llava_descriptions]
rouge_l_scores = [sum(rouge_l(gpt, llv) for gpt, llv in zip(gpt4_descriptions, desc)) / len(desc) for desc in llava_descriptions]

# Create DataFrame
metrics_data = {
    'Dataset Size': llava_dataset_sizes,
    'BLEU Score': bleu_scores,
    'ROUGE-L Score': rouge_l_scores
}
metrics_df = pd.DataFrame(metrics_data)

In [None]:
metrics_df

In [None]:
# Plot metrics - BLEU
plt.figure(figsize=(10, 5))
plt.plot(llava_dataset_sizes, bleu_scores, marker='o', linestyle='-', color='b', label='BLEU Score')
plt.title('BLEU Scores by Dataset Size')
plt.xlabel('Dataset Size')
plt.ylabel('Score')
plt.legend()
plt.grid(True)
plt.xticks(llava_dataset_sizes)
plt.show()

In [None]:
# Plot metrics
plt.figure(figsize=(10, 5))
plt.plot(llava_dataset_sizes, rouge_l_scores, marker='o', linestyle='-', color='r', label='ROUGE-L Score')
plt.title('ROUGE-L Scores by Dataset Size')
plt.xlabel('Dataset Size')
plt.ylabel('Score')
plt.legend()
plt.grid(True)
plt.xticks(llava_dataset_sizes)
plt.show()

In [None]:
# Save the DataFrame to a pickle file
pickle_path = 'metrics_scores.pkl'
metrics_df.to_pickle(pickle_path)