## 1. Setting up


In [1]:
# ref: https://langfuse.com/docs/query-traces
import os
import json
from langfuse import Langfuse
import json
import os
from datetime import datetime
import pandas as pd


LOCAL_HOST = True

"""Define session_id"""
# session_id="qwen2.5-coder_f4d4_dp_batch"
session_id_list = [
    # "qwen2.5-coder:32b_4e11_tpsg_batch",
    # "qwen2.5-coder:32b_ae24_tpsg_batch"
    "qwen2.5-coder:14b_c83f_tpusg_batch",
    "qwen2.5-coder:14b_c83f_psg_batch"
]


"""Define paths"""
 
parent_dir = os.path.dirname(os.getcwd())

 
# date = os.path.basename(parent_dir)
tex_dir = os.path.join(parent_dir, "tex")
processed_data_dir = os.path.join(parent_dir, "processed_data")
raw_export_dir = os.path.join(parent_dir, "raw_export")
ipynb_dir = os.path.join(parent_dir, "ipynb")



## 2.1 Export raw data

Langfuse added a limit of 20 API invocations per minute. https://langfuse.com/faq/all/api-limits


In [2]:
# ALTERNATIVE TO 2.
import os
import json
from time import sleep
from langfuse import Langfuse
from datetime import datetime
from tqdm import tqdm

# LANGFUSE_SERVICE_PUBLIC_KEY = "pk-lf-559a2c0f-ee29-4c32-944c-bf73b5f0ce28"
# LANGFUSE_SERVICE_SECRET_KEY = "sk-lf-75f8bf7f-a5db-4756-b0dd-d758a2a292c8"
# LANGFUSE_SERVICE_HOST = "https://langfuse.hann.fi"


if LOCAL_HOST:
    langfuse = Langfuse(
        secret_key="sk-lf-75f8bf7f-a5db-4756-b0dd-d758a2a292c8",
        public_key="pk-lf-559a2c0f-ee29-4c32-944c-bf73b5f0ce28",
        host="https://langfuse.hann.fi",
    )
else:
    langfuse = Langfuse(
        secret_key=LANGFUSE_SERVICE_SECRET_KEY,
        public_key=LANGFUSE_SERVICE_PUBLIC_KEY,
        host=LANGFUSE_SERVICE_HOST,
    )

API_invok_count = 0
query_range_num_run = {"start": 0, "end": 1}


class CustomJSONEncoder(json.JSONEncoder):
    def __init__(self, *args, LOCAL_HOST=True, **kwargs):
        self.LOCAL_HOST = LOCAL_HOST
        super().__init__(*args, **kwargs)

    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        if hasattr(obj, "__dict__"):
            data = obj.__dict__.copy()
            if "observations" in data:
                data["observations"] = [
                    fetch_observation_data(obs, self.LOCAL_HOST)
                    for obs in data["observations"]
                ]

            return data
        return super().default(obj)


def fetch_observation_data(observation_id, LOCAL_HOST):
    """
    Fetches observation data from Langfuse and returns its dictionary representation.
    """
    print(f"Fetching observation data for {observation_id}...")
    global API_invok_count
    if API_invok_count >= 0 and not LOCAL_HOST:
        print("Waiting for 3 seconds to fetch observation data...")
        for _ in tqdm(range(3), desc="Progress", unit="s"):
            sleep(1)
        API_invok_count = 0

    observation_response = langfuse.fetch_observation(observation_id)
    API_invok_count += 1

    return observation_response.data.dict()


def fetch_and_save_complete_data(session_id_list, raw_export_dir, LOCAL_HOST):
    """
    Fetches complete trace data for each session ID and saves it to JSON files.

    Parameters:
        session_id_list (list): List of session IDs to process.
        raw_export_dir (str): Directory path to save raw JSON files.
    """

    def save_complete_data(session_id):
        global API_invok_count
        if API_invok_count >= 0 and not LOCAL_HOST:
            print("Waiting for 4 seconds to fetch traces...")
            for _ in tqdm(range(4), desc="Progress", unit="s"):
                sleep(1)
            API_invok_count = 0

        fetch_traces_response = langfuse.fetch_traces(session_id=session_id)
        API_invok_count += 1

        print(f"Fetching traces for session {session_id}...")
        # Create directories if they don't exist
        os.makedirs(raw_export_dir, exist_ok=True)

        # Save complete data to JSON file
        # if session_id.startswith("da0a"):
        #     session_id = "phi4_" + session_id
        if "tpsg" in session_id:
            session_id_ = session_id.replace("tpsg", "tpusg")
        else:
            session_id_ = session_id
            
        raw_path = os.path.join(raw_export_dir, f"raw_{session_id_}.json")
        with open(raw_path, "w") as f:
            json.dump(fetch_traces_response, f, cls=CustomJSONEncoder, indent=2)

        print(f"Raw JSON saved to: {raw_path}")

    for session_id in session_id_list:
        save_complete_data(session_id)


fetch_and_save_complete_data(session_id_list, raw_export_dir, LOCAL_HOST)

Fetching traces for session qwen2.5-coder:14b_c83f_tpusg_batch...
Fetching observation data for time-03-26-36-529470_chatcmpl-945c5f9f-cf3c-4c00-8e47-92e5c86705a3...
Fetching observation data for time-03-25-28-963478_chatcmpl-d66d74cf-5da8-4236-9026-634ba753a2c3...
Fetching observation data for time-03-24-16-173487_chatcmpl-9d39e85d-d152-4a1b-8373-2208f078cfb8...
Fetching observation data for time-03-23-08-606344_chatcmpl-2c5b1d3f-4e82-4790-be26-078db2b93be1...
Fetching observation data for time-03-21-11-028718_chatcmpl-ac3aab4a-80d7-46e3-bb0b-a7471d1d3edf...
Fetching observation data for time-03-21-32-275878_chatcmpl-52e16218-1bbc-4bf0-9caf-414fe9693373...
Fetching observation data for time-03-21-56-365651_chatcmpl-9ad29c36-875f-43ef-b71f-f96339959137...
Fetching observation data for time-03-19-58-261013_chatcmpl-89aa28c8-3a6e-4feb-a58e-c07bc163fca2...
Fetching observation data for time-03-14-59-718492_chatcmpl-c8074626-76dc-42e9-a75a-534ddd96fa90...
Fetching observation data for time

## 2.2 Trim data

Here also intercept the runs with fatal errors that need to be excluded from the analysis.


In [3]:
import os
import json
from datetime import datetime


skipped_traces = []


def process_existing_observation(observation):
    """
    Processes an existing observation dictionary by trimming unwanted keys.
    """
    unwanted_observation_keys = [
        "completionStartTime",
        "metadata",
        "timeToFirstToken",
        "createdAt",
        "usageDetails",
        "usage",
        "projectId",
        "unit",
        "updatedAt",
        "version",
        "parentObservationId",
        "promptId",
        "promptName",
        "promptVersion",
        "modelId",
        "inputPrice",
        "outputPrice",
        "totalPrice",
        # "modelParameters",
        "input",
        "output",
    ]

    # If observation is a dictionary containing observation data
    if isinstance(observation, dict):
        trimmed_observation = {
            k: v for k, v in observation.items() if k not in unwanted_observation_keys
        }
        return trimmed_observation
    return observation


def trim_data(data):
    """
    Recursively trims the data structure.
    """

    if isinstance(data, dict):
        # Process the current dictionary
        unwanted_trace_keys = [
            "release",
            "version",
            "user_id",
            "public",
            "html_path",
            "scores",
            "bookmarked",
            "projectId",
            "externalId",
            "page",
            "limit",
            "total_pages",
        ]

        # If this is a trace that contains observations, check for fatal errors
        if "observations" in data:
            # Check for SPAN observations with fatal errors before processing
            skip_trace = False
            for obs in data["observations"]:
                if isinstance(obs, dict) and obs.get("name").startswith("error"):
                    status_message = obs.get("statusMessage", "")
                    ob_name = obs.get("name")
                    print(f"SPAN {ob_name}: {status_message}")

                    if "Fatal error" in status_message:
                        print(f"Found Fatal error in SPAN observation, skipping trace")
                        skip_trace = True
                        skipped_traces.append(data["name"])
                        break

            if skip_trace:
                return None  # Signal to skip this trace

        # Create a new dictionary with wanted keys and recursively process values
        trimmed_data = {}
        for key, value in data.items():
            if key not in unwanted_trace_keys:
                if key == "observations":
                    # Special handling for observations
                    trimmed_data[key] = [
                        process_existing_observation(obs) for obs in value
                    ]
                elif isinstance(value, (dict, list)):
                    # Recursively process nested structures
                    trimmed_data[key] = trim_data(value)
                else:
                    trimmed_data[key] = value

        return trimmed_data

    elif isinstance(data, list):
        # Recursively process each item in the list
        processed_items = []
        for item in data:
            processed_item = trim_data(item)
            if processed_item is not None:  # Only add items that weren't filtered out
                processed_items.append(processed_item)
        return processed_items

    else:
        # Return non-dict, non-list values as is
        return data


def read_and_trim_data(session_id_list, raw_export_dir, trimmed_export_dir):
    """
    Reads complete data from JSON files, trims the data, and saves the trimmed data to new JSON files.
    """
    os.makedirs(trimmed_export_dir, exist_ok=True)

    for session_id in session_id_list:
        try:
            if session_id.startswith("da0a"):
                session_id = "phi4_" + session_id
            # Read raw data
            if "tpsg" in session_id:
                session_id_ = session_id.replace("tpsg", "tpusg")
            else:
                session_id_ = session_id
            raw_path = os.path.join(raw_export_dir, f"raw_{session_id_}.json")
            with open(raw_path, "r") as f:
                data = json.load(f)

            # Process and trim the data
            trimmed_data = trim_data(data)

            # If the entire data was filtered out (unlikely but possible)
            if trimmed_data is None:
                print(
                    f"All traces in session {session_id} were filtered due to fatal errors"
                )
                continue

            # Save trimmed data
            if "tpsg" in session_id:
                session_id_ = session_id.replace("tpsg", "tpusg")
            else:
                session_id_ = session_id
            trimmed_path = os.path.join(
                trimmed_export_dir, f"trimmed_{session_id_}.json"
            )
            with open(trimmed_path, "w") as f:
                json.dump(trimmed_data, f, indent=2)

            print(
                f"Successfully processed and saved trimmed data for session {session_id}"
            )

            # Optional: Verify trimming worked
            # print(f"Verifying trimmed data for session {session_id}...")
            # verify_trimming(trimmed_path)

        except Exception as e:
            print(f"Error processing session {session_id}: {str(e)}")


def verify_trimming(trimmed_path):
    """
    Verifies that the trimmed data doesn't contain unwanted keys.
    """
    with open(trimmed_path, "r") as f:
        trimmed_data = json.load(f)

    unwanted_keys = [
        "release",
        "version",
        "user_id",
        "public",
        "html_path",
        "scores",
        "bookmarked",
        "projectId",
        "externalId",
        "page",
        "limit",
        "total_pages",
        "completionStartTime",
        "metadata",
        "usageDetails",
        "timeToFirstToken",
        "createdAt",
        "completionTokens",
        "promptTokens",
        "projectId",
        "unit",
        "updatedAt",
        "version",
        # "statusMessage",
        "parentObservationId",
        "promptId",
        "promptName",
        "promptVersion",
        "modelId",
        "inputPrice",
        "outputPrice",
        "totalPrice",
        "calculatedInputCost",
        "calculatedOutputCost",
        "calculatedTotalCost",
    ]

    def check_keys(obj):
        if isinstance(obj, dict):
            for key in obj.keys():
                if key in unwanted_keys:
                    print(f"Warning: Found unwanted key '{key}' in trimmed data")
            for value in obj.values():
                check_keys(value)
        elif isinstance(obj, list):
            for item in obj:
                check_keys(item)

    check_keys(trimmed_data)
    print("Verification complete")


# Usage example:
read_and_trim_data(session_id_list, raw_export_dir, raw_export_dir)
print(f"Total {len(skipped_traces)} traces skipped. They are {skipped_traces}")

SPAN error_c8_tpusg_failure_signal_tpu_sketch_generator: Failed: Max retries reached with error. Last error: Traceback (most recent call last):
  File "script_a5bf77e9_1755647879.py", line 35, in <module>
    input_shape = input_details[0]['shape']
TypeError: string indices must be integers.
SPAN error_b3_tpusg_failure_signal_tpu_sketch_generator: Failed: Max retries reached with error. Last error: Traceback (most recent call last):
  File "script_77017dc4_1755646988.py", line 73, in <module>
    if scores[i] > confidence_threshold:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all().
Successfully processed and saved trimmed data for session qwen2.5-coder:14b_c83f_tpusg_batch
SPAN error_0c_psg_failure_signal_py_sketch_generator: Failed. Last error: Max retries reached with failure. Last error from execution: INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
Traceback (most recent call last):
  File "/home/wuguangh/Projects/tinym

## 3. Generate CSV files from JSON


In [4]:
import traceback
import pandas as pd
from datetime import datetime
from dateutil import parser


def json_to_csv(session_id):
    """
    Convert JSON trace data to CSV format with aggregated metrics.

    Args:
        session_id (str): Identifier for the session to process
    """

    def extract_observation_details(observations, trace_id):
        """Extract and aggregate metrics from observations"""
        metrics = {
            "status": None,
            "latency": 0,
            "total_tokens": 0,
            "prompt_tokens": 0,
            "completion_tokens": 0,
            "total_cost": 0,
            "input_cost": 0,
            "output_cost": 0,
            "parameters": set(),
        }

        # Process GENERATION observations
        for obs in (o for o in observations if o["type"] == "GENERATION"):
            metrics["total_tokens"] += obs["totalTokens"]
            metrics["prompt_tokens"] += obs["promptTokens"]
            metrics["completion_tokens"] += obs["completionTokens"]
            metrics["latency"] += obs["latency"]
            for key, value in obs["modelParameters"].items():
                metrics["parameters"].add(key + ":" + value)

            # Add costs if present
            for cost_type in ["Total", "Input", "Output"]:
                key = f"calculated{cost_type}Cost"
                metric_key = cost_type.lower() + "_cost"
                if obs.get(key) is not None:
                    metrics[metric_key] += obs[key]
        if len(metrics["parameters"]) == 0:
            metrics["parameters"] = "N/A"
        # Process SPAN observations for status
        status_indicators = [
            obs["name"]
            for obs in observations
            if obs["type"] == "SPAN" and "start_" not in obs["name"]
        ]

        #  if later than 2025-05-19, use status_signal_from_output
        if datetime.now() > datetime(2025, 5, 19):
            pass
        else:
            # Determine status
            success_signals = sum("end_" in name for name in status_indicators)
            failure_signals = sum(
                "failure_signal" in name for name in status_indicators
            )

            if success_signals + failure_signals > 1:
                raise ValueError(
                    f"Multiple status indicators found in trace {trace_id}"
                )

            metrics["status"] = (
                "success"
                if success_signals
                else "failure" if failure_signals else "unknown"
            )

        metrics["prompt_cost"] = metrics.pop("input_cost")
        metrics["completion_cost"] = metrics.pop("output_cost")
        metrics["latency"] = round(metrics["latency"] / 1000, 2)
        return metrics

    def cal_time(trace):
        time_diff = datetime.fromisoformat(
            trace["updatedAt"].replace("Z", "+00:00")
        ) - datetime.fromisoformat(trace["createdAt"].replace("Z", "+00:00"))
        seconds_diff = time_diff.total_seconds()
        return seconds_diff

    try:

        if session_id.startswith("da0a"):
            session_id = "phi4_" + session_id
        simple_session_id = session_id.rsplit("_", 2)[0]

        
        # Load JSON data
        if "tpsg" in session_id:
                session_id_ = session_id.replace("tpsg", "tpusg")
        else:
                session_id_ = session_id
        trimmed_path = os.path.join(raw_export_dir, f"trimmed_{session_id_}.json")
        print(
            f"Processing session {session_id}, simple id {simple_session_id}. Look for {trimmed_path}"
        )
        with open(trimmed_path, "r") as file:
            traces = json.load(file)["data"]

        # Process traces
        rows = [
            {
                "num_run": trace["metadata"]["num_run"],
                "name": trace["name"],
                "trace_id": trace["id"],
                "batch_id": trace["session_id"],
                # "latency": cal_time(trace),
                # "latency": round(trace["latency"], 2),
                **extract_observation_details(
                    trace["observations"],
                    trace["id"],
                ),
                "status": (
                    "failure"
                    if trace["output"]["status"].lower() == "failed"
                    else "success"
                ),
                "tags": trace["tags"],
                "timestamp": int(parser.isoparse(trace["timestamp"]).timestamp()),
            }
            for trace in traces
        ]
        # print(rows)
        # print(rows)
        # Create and save DataFrame
        df = pd.DataFrame(rows).sort_values("num_run")

        output_dir = os.path.join(processed_data_dir, f"{simple_session_id}")
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)

        output_path = os.path.join(output_dir, f"clean_{session_id_}.csv")

        print(output_path)
        df.to_csv(output_path, index=False)
        print(f"Successfully saved CSV to: {output_path}")

    except FileNotFoundError as e:
        print(
            f"FileNotFoundError: For session {session_id} not found. Looked for {trimmed_path}\nError info: \n{e}\n\nTraceback: {traceback.format_exc()}"
        )
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in input file for session {session_id}")
    except Exception as e:
        print(f"Error processing session {session_id}: {str(e)}")


# Example usage
for session_id in session_id_list:
    json_to_csv(session_id)

Processing session qwen2.5-coder:14b_c83f_tpusg_batch, simple id qwen2.5-coder:14b_c83f. Look for /home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/raw_export/trimmed_qwen2.5-coder:14b_c83f_tpusg_batch.json
/home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/processed_data/qwen2.5-coder:14b_c83f/clean_qwen2.5-coder:14b_c83f_tpusg_batch.csv
Successfully saved CSV to: /home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/processed_data/qwen2.5-coder:14b_c83f/clean_qwen2.5-coder:14b_c83f_tpusg_batch.csv
Processing session qwen2.5-coder:14b_c83f_psg_batch, simple id qwen2.5-coder:14b_c83f. Look for /home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/raw_export/trimmed_qwen2.5-coder:14b_c83f_psg_batch.json
/home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/processed_data/qwen2.5-coder:14b_c83f/clean_qwen2.5-coder:14b_c83f_psg_batch.csv
Successfully saved CSV to: /home/han/Proj

## CSV Generation with Generation Counts

This section creates CSV files similar to the langfuse_export section 3, but adds a column for the number of generation attempts used for each trace.

In [5]:
import traceback
import pandas as pd
import json
import os
from datetime import datetime
from dateutil import parser

# Setup paths - same as langfuse_export
parent_dir = os.path.dirname(os.getcwd())
raw_export_dir = os.path.join(parent_dir, "raw_export")
processed_data_dir = os.path.join(parent_dir, "processed_data")


# Get session id list from data directory
session_id_list = []

for root, dirs, files in os.walk(raw_export_dir):
    for file in files:
        file_path = os.path.join(root, file)
        if "trimmed_" in file_path:
            session_id = file_path.split('trimmed_')[1].rstrip('.json')
            session_id_list.append(session_id)

print(f"Processing sessions: {session_id_list}")
print(f"Looking for raw files in: {raw_export_dir}")
print(f"Will save CSV files to: {processed_data_dir}")


def json_to_csv_weighted(session_id):
    """
    Convert JSON trace data to CSV format with aggregated metrics.
    Upgraded version that includes generation_count column.

    Args:
        session_id (str): Identifier for the session to process
    """

    def extract_observation_details(observations, trace_id):
        """Extract and aggregate metrics from observations"""
        metrics = {
            "status": None,
            "latency": 0,
            "total_tokens": 0,
            "prompt_tokens": 0,
            "completion_tokens": 0,
            "total_cost": 0,
            "input_cost": 0,
            "output_cost": 0,
            "parameters": set(),
            "generation_count": 0,  # New field for generation count
        }

        # Count generations and process GENERATION observations
        for obs in (o for o in observations if o["type"] == "GENERATION"):
            metrics["generation_count"] += 1
            metrics["total_tokens"] += obs["totalTokens"]
            metrics["prompt_tokens"] += obs["promptTokens"]
            metrics["completion_tokens"] += obs["completionTokens"]
            metrics["latency"] += obs["latency"]
            for key, value in obs["modelParameters"].items():
                metrics["parameters"].add(key + ":" + value)

            # Add costs if present
            for cost_type in ["Total", "Input", "Output"]:
                key = f"calculated{cost_type}Cost"
                metric_key = cost_type.lower() + "_cost"
                if obs.get(key) is not None:
                    metrics[metric_key] += obs[key]
                    
        if len(metrics["parameters"]) == 0:
            metrics["parameters"] = "N/A"
            
        # Process SPAN observations for status
        status_indicators = [
            obs["name"]
            for obs in observations
            if obs["type"] == "SPAN" and "start_" not in obs["name"]
        ]

        #  if later than 2025-05-19, use status_signal_from_output
        if datetime.now() > datetime(2025, 5, 19):
            pass
        else:
            # Determine status
            success_signals = sum("end_" in name for name in status_indicators)
            failure_signals = sum(
                "failure_signal" in name for name in status_indicators
            )

            if success_signals + failure_signals > 1:
                raise ValueError(
                    f"Multiple status indicators found in trace {trace_id}"
                )

            metrics["status"] = (
                "success"
                if success_signals
                else "failure" if failure_signals else "unknown"
            )

        metrics["prompt_cost"] = metrics.pop("input_cost")
        metrics["completion_cost"] = metrics.pop("output_cost")
        metrics["latency"] = round(metrics["latency"] / 1000, 2)
        return metrics

    def cal_time(trace):
        time_diff = datetime.fromisoformat(
            trace["updatedAt"].replace("Z", "+00:00")
        ) - datetime.fromisoformat(trace["createdAt"].replace("Z", "+00:00"))
        seconds_diff = time_diff.total_seconds()
        return seconds_diff

    try:
        if session_id.startswith("da0a"):
            session_id = "phi4_" + session_id
        simple_session_id = session_id.rsplit("_", 2)[0]

        # Load JSON data
        if "tpsg" in session_id:
            session_id_ = session_id.replace("tpsg", "tpusg")
        else:
            session_id_ = session_id
        trimmed_path = os.path.join(raw_export_dir, f"trimmed_{session_id_}.json")
        print(
            f"Processing session {session_id}, simple id {simple_session_id}. Look for {trimmed_path}"
        )
        with open(trimmed_path, "r") as file:
            traces = json.load(file)["data"]

        # Process traces
        rows = [
            {
                "num_run": trace["metadata"]["num_run"],
                "name": trace["name"],
                "trace_id": trace["id"],
                "batch_id": trace["session_id"],
                # "latency": cal_time(trace),
                # "latency": round(trace["latency"], 2),
                **extract_observation_details(
                    trace["observations"],
                    trace["id"],
                ),
                "status": (
                    "failure"
                    if trace["output"]["status"].lower() == "failed"
                    else "success"
                ),
                "tags": trace["tags"],
                "timestamp": int(parser.isoparse(trace["timestamp"]).timestamp()),
            }
            for trace in traces
        ]
        
        # Create and save DataFrame
        df = pd.DataFrame(rows).sort_values("num_run")

        output_dir = os.path.join(processed_data_dir, f"{simple_session_id}")
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)

        output_path = os.path.join(output_dir, f"clean_{session_id_}.csv")

        print(output_path)
        df.to_csv(output_path, index=False)
        print(f"Successfully saved CSV to: {output_path}")

    except FileNotFoundError as e:
        print(
            f"FileNotFoundError: For session {session_id} not found. Looked for {trimmed_path}\nError info: \n{e}\n\nTraceback: {traceback.format_exc()}"
        )
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in input file for session {session_id}")
    except Exception as e:
        print(f"Error processing session {session_id}: {str(e)}")



# Example usage
for session_id in session_id_list:
    json_to_csv_weighted(session_id)

Processing sessions: ['qwen2.5-coder:14b_c83f_tpusg_batch', 'qwen2.5-coder:14b_c83f_psg_batch']
Looking for raw files in: /home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/raw_export
Will save CSV files to: /home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/processed_data
Processing session qwen2.5-coder:14b_c83f_tpusg_batch, simple id qwen2.5-coder:14b_c83f. Look for /home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/raw_export/trimmed_qwen2.5-coder:14b_c83f_tpusg_batch.json
/home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/processed_data/qwen2.5-coder:14b_c83f/clean_qwen2.5-coder:14b_c83f_tpusg_batch.csv
Successfully saved CSV to: /home/han/Projects/reference-benchmark-tinyml_llm/data_analysis/2025/08.22/processed_data/qwen2.5-coder:14b_c83f/clean_qwen2.5-coder:14b_c83f_tpusg_batch.csv
Processing session qwen2.5-coder:14b_c83f_psg_batch, simple id qwen2.5-coder:14b_c83f. Look for /home/h