## 1. Setting up


In [2]:
# ref: https://langfuse.com/docs/query-traces
import os
import json
from langfuse import Langfuse
import json
import os
from datetime import datetime
import pandas as pd

LANGFUSE_SERVICE_PUBLIC_KEY = "pk-lf-8b62fd88-54c8-4b3d-a5d9-791ba8a7e90a"
LANGFUSE_SERVICE_SECRET_KEY = "sk-lf-f6a07b0f-1304-492a-aca6-322e264eb313"
LANGFUSE_SERVICE_HOST = "https://cloud.langfuse.com"

LANGFUSE_LOCAL_PUBLIC_KEY = "pk-lf-f24eaab4-afd5-4895-8d52-580a242b99a4"
LANGFUSE_LOCAL_SECRET_KEY = "sk-lf-c6b7cebb-6877-4b71-8d3f-f1be40a046b4"
LANGFUSE_LOCAL_HOST = "http://localhost:3000"

langfuse_secret_key = "sk-lf-c6b7cebb-6877-4b71-8d3f-f1be40a046b4"
langfuse_public_key = "pk-lf-f24eaab4-afd5-4895-8d52-580a242b99a4"
langfuse_host = "http://localhost:3000"

"""Define paths"""
# print(os.getcwd())
parent_dir = os.path.dirname(os.getcwd())

date = "03.18"
date = os.path.basename(parent_dir)
tex_dir = os.path.join(parent_dir, "tex")
processed_data_dir = os.path.join(parent_dir, "processed_data")
raw_export_dir = os.path.join(parent_dir, "raw_export")
ipynb_dir = os.path.join(parent_dir, "ipynb")

"""Define session_id"""
# session_id="qwen2.5-coder_f4d4_dp_batch"
session_id_list = [
    # "qwen2.5-coder:14b_1bb2_mc_batch",
    # "qwen2.5-coder:14b_1bb2_dp_batch",
    # "deepseek-r1:14b_60e0_mc_batch",
    # "phi4_6fca_sg_batch",
    # "llama3.1_da8e_sg_batch"
    # "qwen2.5-coder:14b_b154_sg_batch"
    # "llama3.1_02c0_sg_batch"
    # "qwen2.5-coder:14b_da7d_sg_batch"
    "deepseek-r1:14b_7a02_sg_batch"
    # "deepseek-r1:14b_8757_sg_batch"
]

## 1.9 Pretty print `fetch_traces_response`


## 2.1 Export raw data

Langfuse added a limit of 20 API invocations per minute. https://langfuse.com/faq/all/api-limits


In [18]:
# ALTERNATIVE TO 2.
import os
import json
from time import sleep
from langfuse import Langfuse
from datetime import datetime
from tqdm import tqdm


langfuse = Langfuse(
    secret_key=LANGFUSE_SERVICE_SECRET_KEY,
    public_key=LANGFUSE_SERVICE_PUBLIC_KEY,
    host=LANGFUSE_SERVICE_HOST,
)

API_invok_count = 0
query_range_num_run = {"start": 0, "end": 1}


class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        if hasattr(obj, "__dict__"):
            data = obj.__dict__.copy()
            if "observations" in data:
                data["observations"] = [
                    fetch_observation_data(obs) for obs in data["observations"]
                ]

            return data
        return super().default(obj)


def fetch_observation_data(observation_id):
    """
    Fetches observation data from Langfuse and returns its dictionary representation.
    """
    print(f"Fetching observation data for {observation_id}...")
    global API_invok_count
    if API_invok_count >= 0:
        print("Waiting for 3 seconds to fetch observation data...")
        for _ in tqdm(range(3), desc="Progress", unit="s"):
            sleep(1)
        API_invok_count = 0

    observation_response = langfuse.fetch_observation(observation_id)
    API_invok_count += 1

    return observation_response.data.dict()


def fetch_and_save_complete_data(session_id_list, raw_export_dir):
    """
    Fetches complete trace data for each session ID and saves it to JSON files.

    Parameters:
        session_id_list (list): List of session IDs to process.
        raw_export_dir (str): Directory path to save raw JSON files.
    """

    def save_complete_data(session_id):
        global API_invok_count
        if API_invok_count >= 0:
            print("Waiting for 4 seconds to fetch traces...")
            for _ in tqdm(range(4), desc="Progress", unit="s"):
                sleep(1)
            API_invok_count = 0

        fetch_traces_response = langfuse.fetch_traces(session_id=session_id)
        API_invok_count += 1

        print(f"Fetching traces for session {session_id}...")
        # Create directories if they don't exist
        os.makedirs(raw_export_dir, exist_ok=True)

        # Save complete data to JSON file
        # if session_id.startswith("da0a"):
        #     session_id = "phi4_" + session_id
        raw_path = os.path.join(raw_export_dir, f"raw_{session_id}.json")
        with open(raw_path, "w") as f:
            json.dump(fetch_traces_response, f, cls=CustomJSONEncoder, indent=2)

        print(f"Raw JSON saved to: {raw_path}")

    for session_id in session_id_list:
        save_complete_data(session_id)


fetch_and_save_complete_data(session_id_list, raw_export_dir)

Waiting for 4 seconds to fetch traces...


Progress: 100%|██████████| 4/4 [00:04<00:00,  1.00s/s]


Fetching traces for session deepseek-r1:14b_8757_sg_batch...
Fetching observation data for time-12-51-33-396457_chatcmpl-74d9ac94-2bfa-4fa3-b652-a590fea7c3fb...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.02s/s]


Fetching observation data for time-12-51-48-646627_chatcmpl-f963c295-e9c4-4cd2-99ad-bf0c4a822da4...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 911b014e-909e-4efe-9346-592a226300fa...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for feb89d73-2932-452c-a9a1-df2d694c8dcb...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-47-31-598396_chatcmpl-e3e9c943-0390-4c3e-b2c6-fa59d2ea1d76...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-47-48-034353_chatcmpl-ec572aa6-ac19-4da6-b6e3-cef6d6a1dcb9...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-48-15-638716_chatcmpl-a8b817a0-3259-401f-ba9b-cbf9334ed2d4...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-48-48-057530_chatcmpl-58d833f0-3fda-4dd2-bf66-872a53cf3b51...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 54e334fe-fd74-4b59-81bf-3827ac90c83d...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-49-11-931462_chatcmpl-3f9f5a16-37d9-4418-8201-ec365a1a2cdd...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-50-20-370258_chatcmpl-a994244e-c618-4f0d-a45b-bcb9ba9b038d...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 934a044c-f8a0-43a4-9da0-0d92e739a203...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-43-39-783949_chatcmpl-47305f86-0bb1-4a42-aa76-e51a5ad5be9f...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-43-57-561324_chatcmpl-d1762bf7-01ca-4946-aae6-4fb7b263b794...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-44-19-725357_chatcmpl-6aa78478-9c34-4536-8557-c87408b2122c...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-45-45-995619_chatcmpl-bc050d00-cfed-4fe8-af2f-bb78a229f8a1...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-46-35-901762_chatcmpl-801385ac-852b-431e-b580-003199ad6543...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-47-03-596513_chatcmpl-1deec0ec-6f4a-4cd8-855a-1851043da394...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 2b753d22-f12a-4c33-b887-a8ed04820574...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 9a7d9895-e00f-4f58-9a0c-2dc70fdfaba1...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-38-26-959351_chatcmpl-c927e901-dc9c-46f4-8b48-1ac892c46fb2...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-38-49-451728_chatcmpl-ada30d32-3604-4431-8ca9-1b855d275a00...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-39-20-968985_chatcmpl-f299d09a-762e-421d-9031-dd6cdf428cb2...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-40-28-731567_chatcmpl-96c8ca6c-15a7-4f28-8dd9-587c11ab477b...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-41-51-023820_chatcmpl-52619c76-33f5-4f35-a506-3b88ecb88f09...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-42-57-475762_chatcmpl-b6e91939-1e83-464d-aea7-ad42445b26be...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 6f847382-1218-4f0d-81b9-b531402effbe...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 8de3528e-8b59-45fb-aff3-9e626715fe63...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-32-15-067086_chatcmpl-5bb35198-7313-4cf7-9d01-4de82d8b6f4a...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-32-34-787945_chatcmpl-5a5fea73-2902-4471-96af-50911d9af229...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-34-06-238986_chatcmpl-b23a09cd-ef57-49b9-af10-72b2cb473b90...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-35-16-914957_chatcmpl-5e71afc0-b9b4-4179-83ae-0fab22ade9f8...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-36-51-486850_chatcmpl-73dc8244-028a-4d82-b351-6f8a122fff02...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 4583ef72-eb2e-4b83-88fd-f7b0ec834682...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-37-35-900060_chatcmpl-2da0d8eb-60eb-4a93-88f7-ef1c856936d0...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 9af68743-9fca-4843-b70e-5fbd8b463f05...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-29-07-229367_chatcmpl-7fe416f4-f680-4bb7-9875-4906d73ad189...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-29-27-967439_chatcmpl-c6812f84-935b-464b-8d6b-3d0c2f7fe960...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-30-40-939037_chatcmpl-76a77a5f-cc00-4298-a1df-31f2886b3f10...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-31-06-771444_chatcmpl-9a8ccdb9-871e-435f-8f02-c761d904aaa4...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for 7a9a5c01-284e-4606-a35c-dbcd6c85fdbc...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for c3966557-b33e-4157-9c47-c6d5739d43a6...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-24-30-435258_chatcmpl-bba038de-cdd9-4a91-971e-a3e5540e7491...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-24-44-628976_chatcmpl-431b1bc5-ba34-43e2-aa59-ec3394efa5b9...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-25-43-914058_chatcmpl-5e1f782e-e8cc-4f63-89fa-73796f7d27c8...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-26-39-872843_chatcmpl-c297213f-53c5-41aa-828b-e1b453985398...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-27-27-351786_chatcmpl-9225f627-91b8-43e5-b4f9-f9f8ace69fec...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-28-39-965392_chatcmpl-1cc6cc74-978f-4a70-ba9f-4e946a3a955e...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for d674e8e8-07d6-47aa-a979-dbc83cad2176...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for dbd3bdcd-4112-42b7-a008-8ddde4990ec5...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-22-34-602577_chatcmpl-49250c53-09d0-4d4e-9fea-78f9af541b9e...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-22-50-614107_chatcmpl-6e0839f9-0b1d-4372-a0d4-4e21eae198c9...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-23-13-722862_chatcmpl-a5b7481a-2520-4a2b-89bc-97cddf21eb35...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-23-45-272947_chatcmpl-9d340aff-11c4-4792-9fcf-be33396dbc01...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 518f3a96-047a-4627-aded-41dba31e7242...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for e8ec2b8e-672b-4e5e-8189-e14a67ab8b47...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-19-19-841299_chatcmpl-eddf7810-4487-4f16-9ac2-7451eb99197a...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-19-34-578270_chatcmpl-55e6316f-cf75-417f-92e5-abdf8239fec6...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-20-04-669724_chatcmpl-8440f382-1e67-4626-9bdf-ca207a63088e...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-20-43-870695_chatcmpl-1b327273-5be5-4b9c-9a3d-79d496d8af46...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-21-03-817227_chatcmpl-1ee658fe-40c7-4c4d-a85c-14895cc8cc86...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-21-42-280746_chatcmpl-b807a250-a786-4fca-9d33-b53cd131fa10...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 6513c819-03cb-44d2-80af-c1a068ff2c29...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for aa016fce-cec3-42e0-9c4b-149ed6573fa6...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-12-35-084421_chatcmpl-0703abe0-6fee-4137-98d0-fa23c2d38d4e...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-12-52-636891_chatcmpl-1b702348-6bde-437e-9fcf-99338a624aa6...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for a4244fb0-0fb5-4333-ad70-4b0b5d824c53...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-14-02-405742_chatcmpl-15c05211-551a-4b5c-a74e-a9369376a41b...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-15-33-807366_chatcmpl-8e104d47-9159-4479-a122-8644517bb57d...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-16-49-282268_chatcmpl-6ec4a2bf-85c3-416c-beb1-990556886af5...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-18-10-245945_chatcmpl-15ac7c4e-3363-44d8-b46f-c049684cb695...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for a89c62da-dd97-4bf7-9268-deb9e81fd297...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-06-12-292435_chatcmpl-e5daa48f-f723-4a39-8a1b-bcddb605d00f...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-06-26-099478_chatcmpl-27565516-6d11-482b-98d4-aaac7a873577...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-07-38-058029_chatcmpl-c89b9ab5-9926-4bf7-90cd-f2ad972039c7...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-08-15-029713_chatcmpl-d1d2e7f8-faab-4581-9fbf-25d426ee9717...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 55e5e86b-5d44-4ce4-9e03-9283c996d2e7...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-09-27-426504_chatcmpl-605bd201-4ac7-4d5b-88f3-bb3960728c74...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-10-58-116241_chatcmpl-8cee0b7e-d61e-4b06-be4a-7a2acd841731...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 691a7da6-1c5c-4856-bf11-b620021a2d82...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-59-13-377229_chatcmpl-c1712ce4-fd55-4f0a-9ffa-616298bcd432...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-59-32-411126_chatcmpl-d5c1d291-5462-4d44-8b1a-5293f4d86b7d...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-00-32-466281_chatcmpl-48de16a7-d3b6-4827-9f93-ae2f78a4b1dc...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-01-59-079042_chatcmpl-0247feb9-2f43-40d1-a733-8234f3d25770...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-12-03-43-561091_chatcmpl-ff4074e9-c9eb-473c-9d93-5ddd4d4d3001...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-12-04-51-656805_chatcmpl-14fdd613-07f4-4e6f-8f7e-a06709a70717...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 7ec4dc97-8195-42db-bec5-2000b7d81f18...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 86d380a2-dbfc-4bd2-888d-cffd6dbe8e6b...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for time-11-49-41-099308_chatcmpl-f7ef9b6b-fa06-46b2-ae22-2d28aa2c3327...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-54-56-445675_chatcmpl-edad35fd-7931-41fc-a79a-c2f66f749488...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-55-12-099251_chatcmpl-88912944-2ab9-4208-957d-0eb4062f2fa2...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-56-11-705870_chatcmpl-b1eb3d1e-89f3-410e-9762-abfebb34352a...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-57-07-698814_chatcmpl-362a4fde-872e-4590-8527-0a847eec7f99...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-57-53-694850_chatcmpl-88df37dd-c05c-4c92-8b5d-f039e3cb1b64...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for time-11-58-26-775151_chatcmpl-b53b0402-7834-4831-990c-d1d55d908608...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 04f47b66-6ff1-400f-89fb-9325d20463b9...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 941b4b99-2fbe-493e-8671-4dcb0510229b...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 42a8ccbf-6b35-4a3f-bfec-222fe73a6e88...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 53c6a7db-fb47-42ec-92e4-edfceab31a6d...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for c151e09e-9fb1-4229-9f17-d9017c7b6fdd...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.00s/s]


Fetching observation data for e234815c-ced5-4bd7-9e0d-db0560cda7e9...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 73a97cfe-c854-4d8e-9a2b-506d70eac2d1...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for ceac5af7-ccce-4965-b2fa-baa33f9cbdd8...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 0af2a3e5-06b2-4250-a5f1-e81d9df72474...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 8dc0493e-bb1a-4f45-aa0e-816a858a0d7d...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 44315765-9529-447b-b92b-22d4828a5567...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for c2621b16-e3c8-4d38-b0cd-6f2a1a4c9459...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 0427679e-fe94-4679-95f4-2976cba44630...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 3e1ba8ea-dacd-4995-908d-b8f471c47174...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 6f84001f-9330-4a45-8695-fccc477090e7...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 9de5fc4b-26eb-46e0-b0eb-eb5af2303bf9...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 9907fc71-30e7-45a1-b712-932927a3f51d...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 9fc59ad7-507f-410a-872c-c70491720021...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for 67920c13-617b-4a62-a828-2f71501c61eb...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.01s/s]


Fetching observation data for a553edcd-8aa3-422f-a97b-4497a6430446...
Waiting for 3 seconds to fetch observation data...


Progress: 100%|██████████| 3/3 [00:03<00:00,  1.02s/s]

Raw JSON saved to: /Users/hann/Projects/reference-benchmark-tinyml_llm/data_analysis/03.18/raw_export/raw_deepseek-r1:14b_8757_sg_batch.json





## 2.2 Trim data

Here also intercept the runs with fatal errors that need to be excluded from the analysis.


In [19]:
import os
import json
from datetime import datetime


skipped_traces = []


def process_existing_observation(observation):
    """
    Processes an existing observation dictionary by trimming unwanted keys.
    """
    unwanted_observation_keys = [
        "completionStartTime",
        "metadata",
        "timeToFirstToken",
        "createdAt",
        "usageDetails",
        "usage",
        "projectId",
        "unit",
        "updatedAt",
        "version",
        "parentObservationId",
        "promptId",
        "promptName",
        "promptVersion",
        "modelId",
        "inputPrice",
        "outputPrice",
        "totalPrice",
        "modelParameters",
        "input",
        "output",
    ]

    # If observation is a dictionary containing observation data
    if isinstance(observation, dict):
        trimmed_observation = {
            k: v for k, v in observation.items() if k not in unwanted_observation_keys
        }
        return trimmed_observation
    return observation


def trim_data(data):
    """
    Recursively trims the data structure.
    """

    if isinstance(data, dict):
        # Process the current dictionary
        unwanted_trace_keys = [
            "release",
            "version",
            "user_id",
            "public",
            "html_path",
            "scores",
            "bookmarked",
            "projectId",
            "externalId",
            "page",
            "limit",
            "total_pages",
        ]

        # If this is a trace that contains observations, check for fatal errors
        if "observations" in data:
            # Check for SPAN observations with fatal errors before processing
            skip_trace = False
            for obs in data["observations"]:
                if isinstance(obs, dict) and obs.get("name").startswith("error"):
                    status_message = obs.get("statusMessage", "")
                    ob_name = obs.get("name")
                    print(f"SPAN {ob_name}: {status_message}")

                    if "Fatal error" in status_message:
                        print(f"Found Fatal error in SPAN observation, skipping trace")
                        skip_trace = True
                        skipped_traces.append(data["name"])
                        break

            if skip_trace:
                return None  # Signal to skip this trace

        # Create a new dictionary with wanted keys and recursively process values
        trimmed_data = {}
        for key, value in data.items():
            if key not in unwanted_trace_keys:
                if key == "observations":
                    # Special handling for observations
                    trimmed_data[key] = [
                        process_existing_observation(obs) for obs in value
                    ]
                elif isinstance(value, (dict, list)):
                    # Recursively process nested structures
                    trimmed_data[key] = trim_data(value)
                else:
                    trimmed_data[key] = value

        return trimmed_data

    elif isinstance(data, list):
        # Recursively process each item in the list
        processed_items = []
        for item in data:
            processed_item = trim_data(item)
            if processed_item is not None:  # Only add items that weren't filtered out
                processed_items.append(processed_item)
        return processed_items

    else:
        # Return non-dict, non-list values as is
        return data


def read_and_trim_data(session_id_list, raw_export_dir, trimmed_export_dir):
    """
    Reads complete data from JSON files, trims the data, and saves the trimmed data to new JSON files.
    """
    os.makedirs(trimmed_export_dir, exist_ok=True)

    for session_id in session_id_list:
        try:
            if session_id.startswith("da0a"):
                session_id = "phi4_" + session_id
            # Read raw data
            raw_path = os.path.join(raw_export_dir, f"raw_{session_id}.json")
            with open(raw_path, "r") as f:
                data = json.load(f)

            # Process and trim the data
            trimmed_data = trim_data(data)

            # If the entire data was filtered out (unlikely but possible)
            if trimmed_data is None:
                print(
                    f"All traces in session {session_id} were filtered due to fatal errors"
                )
                continue

            # Save trimmed data
            trimmed_path = os.path.join(
                trimmed_export_dir, f"trimmed_{session_id}.json"
            )
            with open(trimmed_path, "w") as f:
                json.dump(trimmed_data, f, indent=2)

            print(
                f"Successfully processed and saved trimmed data for session {session_id}"
            )

            # Optional: Verify trimming worked
            # print(f"Verifying trimmed data for session {session_id}...")
            # verify_trimming(trimmed_path)

        except Exception as e:
            print(f"Error processing session {session_id}: {str(e)}")


def verify_trimming(trimmed_path):
    """
    Verifies that the trimmed data doesn't contain unwanted keys.
    """
    with open(trimmed_path, "r") as f:
        trimmed_data = json.load(f)

    unwanted_keys = [
        "release",
        "version",
        "user_id",
        "public",
        "html_path",
        "scores",
        "bookmarked",
        "projectId",
        "externalId",
        "page",
        "limit",
        "total_pages",
        "completionStartTime",
        "metadata",
        "usageDetails",
        "timeToFirstToken",
        "createdAt",
        "completionTokens",
        "promptTokens",
        "projectId",
        "unit",
        "updatedAt",
        "version",
        # "statusMessage",
        "parentObservationId",
        "promptId",
        "promptName",
        "promptVersion",
        "modelId",
        "inputPrice",
        "outputPrice",
        "totalPrice",
        "calculatedInputCost",
        "calculatedOutputCost",
        "calculatedTotalCost",
    ]

    def check_keys(obj):
        if isinstance(obj, dict):
            for key in obj.keys():
                if key in unwanted_keys:
                    print(f"Warning: Found unwanted key '{key}' in trimmed data")
            for value in obj.values():
                check_keys(value)
        elif isinstance(obj, list):
            for item in obj:
                check_keys(item)

    check_keys(trimmed_data)
    print("Verification complete")


# Usage example:
read_and_trim_data(session_id_list, raw_export_dir, raw_export_dir)
print(f"Total {len(skipped_traces)} traces skipped. They are {skipped_traces}")

SPAN error_e5_sg_failure_signal_sketch_generator: Fatal error from Ollama server, this run terminated and should be excluded from the batch run:
 litellm.APIConnectionError: OllamaException - {"error":"llama runner process has terminated: CUDA error"}
Found Fatal error in SPAN observation, skipping trace
SPAN error_2a_sg_failure_signal_sketch_generator: Failed to generate valid code after the max 5 attempts, programme terminated.
SPAN error_ea_sg_failure_signal_sketch_generator: Failed to generate valid code after the max 5 attempts, programme terminated.
SPAN error_4f_sg_failure_signal_sketch_generator: Failed to generate valid code after the max 5 attempts, programme terminated.
SPAN error_6f_sg_failure_signal_sketch_generator: Failed to generate valid code after the max 5 attempts, programme terminated.
SPAN error_84_sg_failure_signal_sketch_generator: Fatal error from Ollama server, this run terminated and should be excluded from the batch run:
 litellm.APIConnectionError: OllamaEx

## 3. Generate CSV files from JSON


In [11]:
import traceback
import pandas as pd


def json_to_csv(session_id):
    """
    Convert JSON trace data to CSV format with aggregated metrics.

    Args:
        session_id (str): Identifier for the session to process
    """

    def extract_observation_details(observations, trace_id):
        """Extract and aggregate metrics from observations"""
        metrics = {
            "status": None,
            "latency": 0,
            "total_tokens": 0,
            "prompt_tokens": 0,
            "completion_tokens": 0,
            "total_cost": 0,
            "input_cost": 0,
            "output_cost": 0,
        }

        # Process GENERATION observations
        for obs in (o for o in observations if o["type"] == "GENERATION"):
            metrics["total_tokens"] += obs["totalTokens"]
            metrics["prompt_tokens"] += obs["promptTokens"]
            metrics["completion_tokens"] += obs["completionTokens"]
            metrics["latency"] += obs["latency"]

            # Add costs if present
            for cost_type in ["Total", "Input", "Output"]:
                key = f"calculated{cost_type}Cost"
                metric_key = cost_type.lower() + "_cost"
                if obs.get(key) is not None:
                    metrics[metric_key] += obs[key]

        # Process SPAN observations for status
        status_indicators = [
            obs["name"]
            for obs in observations
            if obs["type"] == "SPAN" and "start_" not in obs["name"]
        ]

        # Determine status
        success_signals = sum("end_" in name for name in status_indicators)
        failure_signals = sum("failure_signal" in name for name in status_indicators)

        if success_signals + failure_signals > 1:
            raise ValueError(f"Multiple status indicators found in trace {trace_id}")

        metrics["status"] = (
            "success"
            if success_signals
            else "failure" if failure_signals else "unknown"
        )

        metrics["prompt_cost"] = metrics.pop("input_cost")
        metrics["completion_cost"] = metrics.pop("output_cost")
        metrics["latency"] = round(metrics["latency"] / 1000, 2)
        return metrics

    def cal_time(trace):
        time_diff = datetime.fromisoformat(
            trace["updatedAt"].replace("Z", "+00:00")
        ) - datetime.fromisoformat(trace["createdAt"].replace("Z", "+00:00"))
        seconds_diff = time_diff.total_seconds()
        return seconds_diff

    try:

        if session_id.startswith("da0a"):
            session_id = "phi4_" + session_id
        simple_session_id = session_id.rsplit("_", 2)[0]

        # Load JSON data
        trimmed_path = os.path.join(raw_export_dir, f"trimmed_{session_id}.json")
        print(
            f"Processing session {session_id}, simple id {simple_session_id}. Look for {trimmed_path}"
        )
        with open(trimmed_path, "r") as file:
            traces = json.load(file)["data"]

        # Process traces
        rows = [
            {
                "num_run": trace["metadata"]["num_run"],
                "name": trace["name"],
                "trace_id": trace["id"],
                "batch_id": trace["session_id"],
                # "latency": cal_time(trace),
                # "latency": round(trace["latency"], 2),
                **extract_observation_details(
                    trace["observations"],
                    trace["id"],
                ),
                "tags": trace["tags"],
            }
            for trace in traces
        ]
        
        
        # print(rows)
        # Create and save DataFrame
        df = pd.DataFrame(rows).sort_values("num_run")

        output_dir = os.path.join(processed_data_dir, f"{simple_session_id}")
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)

        output_path = os.path.join(output_dir, f"clean_{session_id}.csv")

        print(output_path)
        df.to_csv(output_path, index=False)
        print(f"Successfully saved CSV to: {output_path}")

    except FileNotFoundError as e:
        print(
            f"FileNotFoundError: For session {session_id} not found. Looked for {trimmed_path}\nError info: \n{e}\n\nTraceback: {traceback.format_exc()}"
        )
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in input file for session {session_id}")
    except Exception as e:
        print(f"Error processing session {session_id}: {str(e)}")


# Example usage
for session_id in session_id_list:
    json_to_csv(session_id)

Processing session deepseek-r1:14b_7a02_sg_batch, simple id deepseek-r1:14b_7a02. Look for /Users/hann/Projects/reference-benchmark-tinyml_llm/data_analysis/03.18/raw_export/trimmed_deepseek-r1:14b_7a02_sg_batch.json
<generator object json_to_csv.<locals>.<genexpr> at 0x13c44b610>
/Users/hann/Projects/reference-benchmark-tinyml_llm/data_analysis/03.18/processed_data/deepseek-r1:14b_7a02/clean_deepseek-r1:14b_7a02_sg_batch.csv
Successfully saved CSV to: /Users/hann/Projects/reference-benchmark-tinyml_llm/data_analysis/03.18/processed_data/deepseek-r1:14b_7a02/clean_deepseek-r1:14b_7a02_sg_batch.csv


## Code below is archived


In [12]:
# """Simply calculate success rate"""


# def cal_success_rate(session_id):

#     end_signal_count = 0
#     failure_signal_count = 0
#     # Function to print the name of each observation
#     with open(f"{raw_export_dir}/trimmed_{session_id}.json", "r") as file:
#         data = json.load(file)["data"]
#     for i in data:

#         observations = i["observations"]

#         for observation in observations:
#             # print(type(observation))
#             for key, value in observation.items():
#                 # print(f"{key}: {value}")
#                 for key, value in value.items():
#                     # print(f"{key}: {value}")
#                     if key == "name":
#                         if "end_" in value:

#                             end_signal_count += 1
#                         if "failure_signal" in value:

#                             failure_signal_count += 1

#     print(f"Session ID: {session_id}")
#     total_count = end_signal_count + failure_signal_count
#     if total_count > 0:
#         success_rate = end_signal_count / total_count
#         print(f"Success rate: {success_rate:.4f}")
#     else:
#         print("Success rate: N/A (no signals found)")
#     print(f"Passed:\t{end_signal_count}\nFailed:\t{failure_signal_count}")
#     print(
#         ""
#         if total_count == 30
#         else "Number of ending signals does not match the expected number!"
#     )
#     print("-" * 50)


# for session_id in session_id_list:
#     cal_success_rate(session_id)

In [13]:
# def cal_time(start_time, end_time):
#     time_diff = datetime.fromisoformat(
#         end_time.replace("Z", "+00:00")
#     ) - datetime.fromisoformat(start_time.replace("Z", "+00:00"))
#     seconds_diff = time_diff.total_seconds()
#     return seconds_diff


# print(cal_time("2025-01-15T03:31:56.150000+00:00", "2025-01-15T03:32:59.384Z"))

In [14]:
# """Print the complete structure of exported json file"""
# def print_keys(d, parent_key=''):
#     if isinstance(d, dict):
#         for key, value in d.items():
#             full_key = f"{parent_key}.{key}" if parent_key else key
#             print(full_key)
#             print_keys(value, full_key)
#     elif isinstance(d, list):
#         for i, item in enumerate(d):
#             full_key = f"{parent_key}[{i}]"
#             print_keys(item, full_key)

# # Load JSON data from a file
# with open('fetch_traces_response.json', 'r') as file:
#     data = json.load(file)['data'][0]
# # Print all keys
# print_keys(data)