In [51]:
import pandas as pd
import json
from glob import glob
import os

import numpy as np
import re

from tgb.linkproppred.evaluate import Evaluator
from tgb.linkproppred.dataset import LinkPropPredDataset

In [52]:
data = "uci"
dataset = f"tgbl-{data}"
model = "gpt-4o-mini-2024-07-18"   #  "gpt-4.1-mini-2025-04-14"
prompt_version = "base_v1_cot"

result_folder = f"./result/{dataset}/{model}/{prompt_version}/output"

output_folder = f"./explanations/{model}"
output_file = f"{output_folder}/explanations_{data}.csv"

folder = f"./output/{dataset}/{model}/{prompt_version}"
answer_key = f"{folder}/answer_key.parquet"

os.makedirs(output_folder, exist_ok=True)

In [53]:
link_prop_dataset = LinkPropPredDataset(name=dataset, root="datasets", preprocess=True)
evaluator = Evaluator(name=dataset)
metric = link_prop_dataset.eval_metric
evaluator = Evaluator(name=dataset)

raw file found, skipping download
Dataset directory is  /Users/zacharyyang/anaconda3/lib/python3.10/site-packages/tgb/datasets/tgbl_uci
loading processed file


In [54]:
result = []
error_count = 0

for file in glob(f"{result_folder}/*.jsonl"):
    with open(file, "r") as f:
        for line in f:
            data = json.loads(line)
            id = data.get("custom_id")
            model_answer = data["response"]["body"]["choices"][0]["message"]["content"]

            try:
                explanation = []
                
                model_answer = json.loads(model_answer)
                destination_node = model_answer["destination_node"]
                for step in model_answer["steps"]:
                    explanation.append(step["explanation"])
                explanation = "\n".join(explanation)

                result.append({"task_id": id, 
                               "explanation": explanation,
                               "destination_node": destination_node})
            except Exception as e:
                print(f"Error parsing JSON: {e}")
                error_count += 1
                continue
            
print(f"Total errors: {error_count}")
print(f"Total results: {len(result)}")

Error parsing JSON: Unterminated string starting at: line 1 column 12 (char 11)
Error parsing JSON: Expecting ',' delimiter: line 42 column 216 (char 118373)
Total errors: 2
Total results: 8974


In [55]:
explanation_df = pd.DataFrame(result)


In [56]:
answer_key = pd.read_parquet(answer_key)

In [57]:
explanation_df = explanation_df.merge(answer_key, on="task_id")

In [58]:
def predict_link(query_dst: np.ndarray, llm_dst: int) -> np.ndarray:
    r"""
    convert LLM prediction into MRR format, just check if the LLM prediction is within the possible destinations
    """
    pred = np.zeros(len(query_dst))
    idx = 0
    for dst in query_dst:
        if (dst == llm_dst):
            pred[idx] = 1.0
        idx += 1
    return pred

def get_score(query_dst: np.ndarray, 
              llm_dst: int,
              evaluator,
              metric: str) -> float:
    r"""
    get the score of the LLM prediction
    """

    y_pred = predict_link(query_dst, llm_dst)
    input_dict = {
            "y_pred_pos": np.array([y_pred[0]]),
            "y_pred_neg": np.array(y_pred[1:]),
            "eval_metric": [metric],
        }
    return evaluator.eval(input_dict)[metric]
   

In [59]:
explanation_df["score"] = explanation_df.apply(
    lambda row: get_score(
        row["query_dst"][0],
        row["destination_node"],
        evaluator,
        metric
    ),
    axis=1
)

In [60]:
print(f"Average Score for {dataset} {prompt_version} is: {np.mean(explanation_df['score']):.3f}")

Average Score for tgbl-uci base_v1_cot is: 0.057


In [61]:
explanation_df[["task_id","destination_node", "score", "explanation",]].to_csv(output_file, index=False)