In [452]:
import pandas as pd
import json
from glob import glob

In [453]:
dataset = "explanations"
model = "gpt-4.1-mini-2025-04-14" #"gpt-4o-mini-2024-07-18" #   
prompt_version = "wiki"

folder = f"./output/{dataset}/{model}/{prompt_version}"
result_folder = f"./result/{dataset}/{model}/{prompt_version}/output"

prev_file = f"./explanations/{model}/explanations_{prompt_version}.csv"

In [454]:
result = []
error_count = 0

for file in glob(f"{result_folder}/*.jsonl"):
    with open(file, "r") as f:
        for line in f:
            data = json.loads(line)
            id = data.get("custom_id")
            try:
                # Default value
                category = "Other"
                model_answer = data["response"]["body"]["choices"][0]["message"]["content"]

                # Load model answer
                model_answer = json.loads(model_answer)

                # Get answers
                category = model_answer["category"]
                proposed_category = model_answer["proposed_category"]
                justification = model_answer["justification"]

                result.append(
                {
                    "task_id": id,
                    "category": category,
                    "proposed_category": proposed_category,
                    "justification": justification,
                }
                )            
            except Exception as e:
                error_count +=1
                print(f"Error for {id}: {e}")
                    
                    
result = pd.DataFrame(result)
print(f"Error count: {error_count}")



Error count: 0


In [455]:
len(result)

5000

In [456]:
def get_id(task_id):
    return int(task_id.split("-")[2])
result["id"] = result["task_id"].apply(get_id)
result.sort_values(by=["id"], inplace=True)
result

Unnamed: 0,task_id,category,proposed_category,justification,id
0,wiki-explanation-0,Repeated / Consistent Interaction Pattern,,The explanation predicts the next destination ...,0
1,wiki-explanation-1,Repeated / Consistent Interaction Pattern,,The explanation focuses on the source node rep...,1
2,wiki-explanation-2,Repeated / Consistent Interaction Pattern,,The explanation notes that the source node has...,2
3,wiki-explanation-3,Repeated / Consistent Interaction Pattern,,The explanation highlights repeated interactio...,3
4,wiki-explanation-4,Lack of Data or Default / Fallback Prediction,,The explanation explicitly states the absence ...,4
...,...,...,...,...,...
4995,wiki-explanation-4995,Repeated / Consistent Interaction Pattern,,The explanation highlights a repeated pattern ...,4995
4996,wiki-explanation-4996,Most Recent Interaction Heuristic,,The explanation chooses the next destination b...,4996
4997,wiki-explanation-4997,Repeated / Consistent Interaction Pattern,,The explanation identifies multiple repeated i...,4997
4998,wiki-explanation-4998,Most Recent Interaction Heuristic,,The explanation bases the prediction on the de...,4998


In [457]:
prev_explanation_df = pd.read_csv(prev_file)
prev_explanation_df["id"] = prev_explanation_df["task_id"].apply(get_id)
prev_explanation_df.sort_values(by=["id"], inplace=True)
prev_explanation_df

Unnamed: 0,task_id,destination_node,score,explanation,id
0,tgbl-wiki-0,8267,1.000000,Identify the past interactions of source node ...,0
1,tgbl-wiki-1,9180,0.001996,Review the past interactions of Source Node 73...,1
2,tgbl-wiki-2,9180,0.001996,The source node 7325 has past interactions onl...,2
3,tgbl-wiki-3,8988,1.000000,"Looking at the temporal graph, we observe two ...",3
4,tgbl-wiki-4,-1,0.001998,Source Node 5622 is not present in the provide...,4
...,...,...,...,...,...
4995,tgbl-wiki-4995,9180,0.001996,The source node 3374 has two past interactions...,4995
4996,tgbl-wiki-4996,9210,1.000000,The past interactions provided for Source Node...,4996
4997,tgbl-wiki-4997,9146,1.000000,The given past interactions of Source Node 579...,4997
4998,tgbl-wiki-4998,9210,1.000000,Examining the given past interactions for Sour...,4998


In [458]:
len(prev_explanation_df), len(result)

(5000, 5000)

In [459]:
explanation_score_df = pd.DataFrame(
    {
        "score": prev_explanation_df["score"][:5000].reset_index(drop=True),
        "category": result["category"][:5000].reset_index(drop=True),
    }
)

In [460]:
result = result[:5000].copy()
result = result.reset_index(drop=True)
result["explanation"] = prev_explanation_df["explanation"][:5000].reset_index(drop=True)

In [461]:
# Define the desired order
category_order = [
    "Most Recent Interaction Heuristic",
    "Repeated / Consistent Interaction Pattern",
    "Pattern Continuation or Extrapolation",
    "Lack of Data or Default / Fallback Prediction",
    "New Node or Unseen Interaction",
    "Sequence or Alternation Logic",
    "Most Frequent Past Destination",
    "Ambiguous or Multiple Candidates",
    "Default or Most Common Node",
    "Others"
]

category_name = [
    "Most Recent Interaction Heuristic",
    "Repeated or Consistent Interaction Pattern",
    "Pattern Continuation or Extrapolation",
    "Lack of Data or Default or Fallback Prediction",
    "New Node or Unseen Interaction",
    "Sequence or Alternation Logic",
    "Most Frequent Past Destination",
    "Ambiguous or Multiple Candidates",
    "Default or Most Common Node",
    "Others"
]

In [462]:
# Modify the value_counts() call to use this order
ordered_counts = result['category'][:5000].value_counts()
ordered_counts = ordered_counts.reindex(category_order).fillna(0)
ordered_counts

category
Most Recent Interaction Heuristic                 447
Repeated / Consistent Interaction Pattern        3618
Pattern Continuation or Extrapolation              50
Lack of Data or Default / Fallback Prediction     569
New Node or Unseen Interaction                     19
Sequence or Alternation Logic                       2
Most Frequent Past Destination                    253
Ambiguous or Multiple Candidates                    6
Default or Most Common Node                        23
Others                                             13
Name: count, dtype: int64

In [463]:
ordered_counts.to_csv(f"{result_folder}/category_counts.csv", index=True)

In [464]:
for name, category in zip(category_name, category_order):
    print(f"{category}")
    temp_df = result[result['category'] == category]

    sample_count = min(5, len(temp_df))
    temp_df.sample(sample_count).to_csv(f"{result_folder}/{name}.csv", index=False)

Most Recent Interaction Heuristic
Repeated / Consistent Interaction Pattern
Pattern Continuation or Extrapolation
Lack of Data or Default / Fallback Prediction
New Node or Unseen Interaction
Sequence or Alternation Logic
Most Frequent Past Destination
Ambiguous or Multiple Candidates
Default or Most Common Node
Others


In [465]:
mean_scores = explanation_score_df.groupby("category")["score"].mean()
mean_scores = mean_scores.reindex(category_order)
mean_scores

category
Most Recent Interaction Heuristic                0.718683
Repeated / Consistent Interaction Pattern        0.248325
Pattern Continuation or Extrapolation            0.301397
Lack of Data or Default / Fallback Prediction    0.017783
New Node or Unseen Interaction                   0.001996
Sequence or Alternation Logic                    0.001996
Most Frequent Past Destination                   0.814600
Ambiguous or Multiple Candidates                 0.500998
Default or Most Common Node                      0.001996
Others                                           0.001996
Name: score, dtype: float64

In [466]:
mean_scores.to_csv(f"{result_folder}/category_mean_scores.csv", index=True)