In [None]:
from framework.utilities.metrics import calculate_bleu, calculate_rouge
import numpy as np

In [None]:
import pickle
import pandas as pd

with open('data/input_output_train', 'rb') as file:
    combined_outputs = pickle.load(file)

df_combined_outputs = pd.DataFrame(combined_outputs)
df_combined_outputs.head()

In [None]:
total_bleu_13b = 0
total_bleu_7b = 0
total_bleu_tiny = 0
num_rows = 0

for row in df_combined_outputs['wmt14']:
    num_rows += 1
    total_bleu_13b += calculate_bleu(row['13b'], row['label'])
    total_bleu_7b += calculate_bleu(row['7b'], row['label'])
    total_bleu_tiny += calculate_bleu(row['tiny'], row['label'])

In [None]:
avg_bleu_13b = total_bleu_13b / num_rows
avg_bleu_7b = total_bleu_7b / num_rows
avg_bleu_tiny = total_bleu_tiny / num_rows

print(f"Avg BLEU for 13B: {avg_bleu_13b}")
print(f"Avg BLEU for 7B: {avg_bleu_7b}")
print(f"Avg BLEU for Tiny: {avg_bleu_tiny}")

In [None]:
h_tilde = .51

In [None]:
p_t_13b = h_tilde / .551
p_t_tiny = h_tilde / .491

In [None]:
print(p_t_13b)
print(p_t_tiny)

In [None]:
p_t_13b = (h_tilde - avg_bleu_tiny) / (avg_bleu_13b - avg_bleu_tiny)
p_t_tiny = 1 - p_t_13b

In [None]:
print(p_t_13b)
print(p_t_tiny)

In [None]:
def sample_model(p_13b, p_tiny):
    return np.random.choice(['13b', 'tiny'], p=[p_13b, p_tiny])

In [None]:
total_sampled_bleu = 0
selected_model_counts = {'13b': 0, 'tiny': 0}

for row in df_combined_outputs['wmt14']:
    if row['13b'] and row['tiny'] and row['label']:  
        selected_model = sample_model(p_t_13b, p_t_tiny)
        selected_model_counts[selected_model] += 1

        bleu_score = calculate_bleu(row[selected_model], row['label'])
        total_sampled_bleu += bleu_score

avg_sampled_bleu = total_sampled_bleu / num_rows

print(f"Average BLEU score for selected models: {avg_sampled_bleu}")
print(f"Selected model counts: {selected_model_counts}")

In [None]:
(938 * 527.870)/3000

In [None]:
(2060 * 44.639)/3000

In [None]:
total_rouge_13b = 0
total_rouge_7b = 0
total_rouge_tiny = 0
num_rows = 0

for row in df_combined_outputs['cnn_dailymail']:
    num_rows += 1
    total_rouge_13b += calculate_rouge(row['13b'], row['label'])['rouge1']
    total_rouge_7b += calculate_rouge(row['7b'], row['label'])['rouge1']
    total_rouge_tiny += calculate_rouge(row['tiny'], row['label'])['rouge1']

In [None]:
avg_rouge_13b = total_rouge_13b / num_rows
avg_rouge_7b = total_rouge_7b / num_rows
avg_rouge_tiny = total_rouge_tiny / num_rows

print(f"Avg ROUGE for 13B: {avg_rouge_13b}")
print(f"Avg ROUGE for 7B: {avg_rouge_7b}")
print(f"Avg ROUGE for Tiny: {avg_rouge_tiny}")

In [None]:
h_tilde = .3125

In [None]:
p_t_13b = (h_tilde - avg_rouge_tiny) / (avg_rouge_13b - avg_rouge_tiny)
p_t_tiny = 1 - p_t_13b
print(p_t_13b)
print(p_t_tiny)

In [None]:
total_sampled_rouge = 0
selected_model_counts = {'13b': 0, 'tiny': 0}

for row in df_combined_outputs['cnn_dailymail']:
    if row['13b'] and row['tiny'] and row['label']:  
        selected_model = sample_model(p_t_13b, p_t_tiny)
        selected_model_counts[selected_model] += 1

        rouge_score = calculate_rouge(row[selected_model], row['label'])
        total_sampled_rouge += rouge_score['rouge1']

avg_sampled_rouge = total_sampled_rouge / num_rows

print(f"Average ROUGE score for selected models: {avg_sampled_rouge}")
print(f"Selected model counts: {selected_model_counts}")

In [None]:
(739 * 750.285)/3000

In [None]:
(2220 * 142.08)/3000