## Metric parser

This notebook outputs a json, which can be used in metrics.ipynb script.

Search for "#SETUP" to find cells which should be modified. (To add directory paths and output file name)

In [None]:
import os
import json
import pandas as pd
from dateutil import parser
print(os.getcwd())

In [None]:
months = ["January",
          "February",
          "March",
          "April",
          "May",
          "June",
          "July",
          "August",
          "Sepember",
          "October",
          "November",
          "December"]
prompts = [f"prompt_{i}" for i in range(6)]
models = ["deepseek-r1",
          "deepseek-r1-llama-8B",
          "deepseek-r1-qwen-7B",
          "deepseek-r1-qwen-14B",
          "deepseek-r1-qwen-32B",
          "llama3-3"]

In [None]:

def extract_year(date_str):
    try:
        dt = parser.parse(date_str, fuzzy=True)
        return dt.year
    except ValueError:
        return None

In [None]:
os.chdir("..")
os.getcwd()

## LLM output parser

In [None]:
#SETUP:
root_dir = "./minisubset03_LLM_output" #Relative path to root directory of LLM output

In [None]:

data = []
for i, person in enumerate(os.listdir(root_dir)):
    person_path = os.path.join(root_dir, person)
    print(f"{i+1}/{len(os.listdir(root_dir))}")
    if os.path.isdir(person_path):
        for filename in os.listdir(person_path):
            if filename.endswith(".json"):
                file_path = os.path.join(person_path, filename)
                with open(file_path, "r", encoding="utf8") as f:
                    json_data = json.load(f)
                    for image in json_data:
                        image["name"] = person
                        image["path"] = "./"+"/".join(image["path"].split("/")[2:])
                        image["left_photo_year"] = None
                        image["right_photo_year"] = None
                        for i in range(6):
                            if models[i] in filename:
                                image["model"] = models[i]
                            if prompts[i] in filename:
                                image["prompt"] = i
                    data.extend(json_data)

df = pd.DataFrame(data)
df["birth_year"] = [extract_year(i) if i is not None else None for i in df["birthday"]]
df.head()

## Human annotation parser


In [None]:
#SETUP
human_root_dir = "./minisubset03_annotated" # Enter the relative path to the human annotation output.

In [None]:
data_h = []


for i, person in enumerate(os.listdir(human_root_dir)):
    person_path = os.path.join(human_root_dir, person)
    #print(f"{i+1}/{len(os.listdir(root_dir))}")
    if os.path.isdir(person_path):
        file_path = os.path.join(person_path, "annotation.json")
        try:
            with open(file_path, "r", encoding="utf8") as f:
                json_data = json.load(f)
            person_all_pics = []
            for pic in json_data:
                cur_person = dict()
                cur_person["name"] = person
                if pic["birth_day"] != "" and pic["birth_month"] !="" and pic["birth_year"] !="":
                    cur_person["birthday"] = f"{pic['birth_day']} {months[int(pic['birth_month'])-1]} {pic['birth_year']}"
                    cur_person["birth_year"] = pic["birth_year"]
                elif pic["birth_year"] !="":
                    cur_person["birthday"] = f"{pic['birth_year']}"
                    cur_person["birth_year"] = pic["birth_year"]
                else:
                    cur_person["birthday"] = None
                    cur_person["birth_year"] = None
                cur_person["year_of_photo"] = None
                cur_person["year_of_photo_int"] = None
                cur_person["left_photo_year"] =  pic['estimated_year_creation_left'] if pic['estimated_year_creation_left'] else None
                cur_person["right_photo_year"] = pic['estimated_year_creation_right'] if pic['estimated_year_creation_right'] else None
                try:
                    cur_person["can_determine"] = pic["wiki_page_sufficient"]
                except:
                    cur_person["can_determine"] = None
                cur_person["caption"] = None
                cur_person["path"] = "./"+"/".join(pic["path"].split("/")[2:])
                cur_person["prompt"] = -1
                cur_person["model"] = "human"
                person_all_pics.append(cur_person)
            data_h.extend(person_all_pics)
        except Exception as e:
            print(e, pic['birth_month'])
            continue
                    
df2 = pd.DataFrame(data_h)
df2.head()

## Generate output json for metrics.ipynb

In [None]:
#SETUP
output_file_name = 'final_annotation.json' # Change to the desired name of the output file

In [None]:
df_a = pd.concat([df, df2], ignore_index=True)
df_a = df_a.where(pd.notna(df_a), None)
df_a.to_json(output_file_name, orient='records')