# Replication of "Time Travel in LLMs"

Download the Github repo of the paper: https://github.com/shahriargolchin/time-travel-in-llms/tree/main

In [13]:
import pandas as pd
import os
import evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [15]:
def calculate_bleurt(preds, refs):
    bleurt = evaluate.load("bleurt", module_type="metric", checkpoint="BLEURT-20")
    bleurt_score = bleurt.compute(predictions=preds, references=refs)
    return bleurt_score

In [16]:
def calculate_rouge(preds, refs):
    rouge = evaluate.load("rouge")
    rouge_score = rouge.compute(predictions=preds, references=refs)
    return rouge_score

In [17]:
results_dir = "time-travel-in-llms-main/results/"
paths = []
for dirpath, _, filenames in os.walk(results_dir):
    for file in filenames:
        if file.endswith(".csv"):
            paths.append(os.path.join(dirpath, file))

In [None]:
for file_path in paths:
    with open(file_path) as f:
        df = pd.read_csv(f)
        bleurt_score_guided = calculate_bleurt(preds=df["generated_guided_completion"].tolist(), refs=df["second_piece"].tolist())
        rouge_score_guided = calculate_rouge(preds=df["generated_guided_completion"].tolist(), refs=df["second_piece"].tolist())

        bleurt_score_unguided = calculate_bleurt(preds=df["generated_general_completion"].tolist(), refs=df["second_piece"].tolist())
        bleurt_score_guided = calculate_rouge(preds=df["generated_general_completion"].tolist(), refs=df["second_piece"].tolist())

        print(f"File: {file_path}")
        print(f"Our BLEURT Score Guided: {bleurt_score_guided}, theirs: {df['bleurt_score_for_guided_completion'].tolist()}")
        print(f"Our ROUGE Score Guided: {rouge_score_guided}, theirs: {df['rouge_score_for_guided_completion'].tolist()}")
        print(f"Our BLEURT Score Unguided: {bleurt_score_unguided}, theirs: {df['bleurt_score_for_general_completion'].tolist()}")
        print(f"Our ROUGE Score Unguided: {rouge_score_guided}, theirs: {df['rouge_score_for_general_completion'].tolist()}")


Downloading builder script: 100%|██████████| 5.20k/5.20k [00:00<00:00, 4.78MB/s]


ImportError: To be able to use evaluate-metric/bleurt, you need to install the following dependencies['bleurt'] using 'pip install git+https://github.com/google-research/bleurt.git' for instance'