In [35]:
import os
from collections.abc import MutableMapping
from pathlib import Path

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import yaml

from java_migration.analysis.utils import (
    MavenStatus,
    flatten,
    get_experiment_data,
    get_maven_logs,
    visualize_agent_trace,
)
from java_migration.eval.maven_build_verifier import MavenBuildVerifier
from java_migration.eval.smol_log_parser import parse_log
from java_migration.eval.utils import recover_safe_repo_name
from java_migration.utils import REPO_ROOT

In [36]:
experiment_path = Path(
    # "/Users/mayvic/Documents/git/java-migration-paper/data/experiments/2025-03-09/22-19-56-hopeful-wescoff"
    # "/Users/mayvic/Documents/git/java-migration-paper/data/experiments/2025-03-11/13-55-09-condescending-galileo"
    # "/Users/mayvic/Documents/git/java-migration-paper/data/experiments/2025-03-11/14-14-38-awesome-pasteur"
    # "/Users/mayvic/Documents/git/java-migration-paper/data/experiments/2025-03-11/15-14-25-loving-newton"
    # "/Users/mayvic/Documents/git/java-migration-paper/data/experiments/2025-03-11/16-15-09-tender-varahamihira"
    "/Users/mayvic/Documents/git/java-migration-paper/data/experiments/2025-03-11/17-59-17-recursing-leavitt"
)
exp_data = get_experiment_data(experiment_path)

In [37]:
def get_experiment_features(exp_data):
    exp_feats = []
    for repo_name, repo_data in exp_data.items():
        cur_feats = {"repo_name": repo_name}
        if "build_result" in repo_data and repo_data["build_result"] is not None:
            cur_feats.update(flatten(repo_data["build_result"]))
        exp_feats.append(cur_feats)
        if "agent_log" in repo_data:
            cur_feats["run_duration"] = np.sum([y.meta.duration for y in repo_data["agent_log"].steps])
            cur_feats["input_tokens"] = repo_data["agent_log"].steps[-1].meta.input_tokens
            cur_feats["output_tokens"] = repo_data["agent_log"].steps[-1].meta.output_tokens
            cur_feats["cost"] = cur_feats["input_tokens"] * 0.15 / 1_000_000 + cur_feats["output_tokens"] * 0.6 / 1_000_000
            cur_feats["num_steps"] = len(repo_data["agent_log"].steps)
            if len(repo_data["agent_log"].steps) > 1:
                if repo_data["agent_log"].steps[-2].raw_text:
                    cur_feats["vertex_fail"] = (
                        "ServiceUnavailableError" in repo_data["agent_log"].steps[-2].raw_text
                        or "RateLimitError" in repo_data["agent_log"].steps[-2].raw_text
                    )
                else:
                    cur_feats["vertex_fail"] = False
            else:
                cur_feats["vertex_fail"] = False
        if "repo_feats" in repo_data:
            cur_feats.update(repo_data["repo_feats"])

    return exp_feats


exp_features = get_experiment_features(exp_data)
df = pd.DataFrame(exp_features)
df

Unnamed: 0,repo_name,build_result_build_success,build_result_test_results_errors,build_result_test_results_failures,build_result_test_results_skipped,build_result_test_results_tests_run,build_result_test_success,error,run_success,run_duration,...,cost,num_steps,vertex_fail,build_tool,number_of_external_dependencies,number_of_java_files,number_of_lines_of_code,number_of_modules,number_of_unit_tests,build_result_test_results
0,DeemOpen/zkui,True,0.0,0.0,0.0,0.0,True,,True,31.04,...,0.006965,7,False,maven,28,20,2222,1,0,
1,EalenXie/spring-microservice-ddd,True,0.0,0.0,0.0,2.0,True,,True,18.66,...,0.006199,8,False,maven,14,34,2053,1,1,
2,killme2008/aviatorscript,True,26.0,0.0,0.0,1712.0,False,,True,271.35,...,2.112475,100,True,maven,12,283,47969,1,643,
3,ata4/disunity,True,0.0,0.0,0.0,328.0,True,,True,121.73,...,0.170807,38,False,maven,12,95,7250,3,6,
4,opengoofy/hippo4j,False,,,,,,,True,184.27,...,2.046916,98,False,maven,48,870,69708,8,423,
5,EnterpriseQualityCoding/FizzBuzzEnterpriseEdition,False,,,,,,,True,114.62,...,0.078175,28,False,maven,12,87,2489,1,1,
6,perwendel/spark,True,44.0,20.0,0.0,624.0,False,,True,155.15,...,0.651022,100,True,maven,28,100,13947,1,320,
7,alibaba/QLExpress,True,0.0,0.0,4.0,386.0,True,,True,167.17,...,0.361438,33,False,maven,10,142,10938,1,193,
8,SPuerBRead/Bridge,False,,,,,,,True,48.29,...,0.013179,13,False,maven,16,32,2408,1,0,
9,joelittlejohn/jsonschema2pojo,True,0.0,0.0,0.0,1214.0,True,,True,185.25,...,0.454999,31,False,maven,101,96,15097,6,671,


In [38]:
print(f"""
Total repos: {len(df)}
Compilation Success: {df.build_result_build_success.sum()}
Test Success: {df.build_result_test_success.sum()}
Failed due to LLM API issues: {df.vertex_fail.sum()}
Cost: {df.cost.sum()}
""")


Total repos: 15
Compilation Success: 11
Test Success: 8
Failed due to LLM API issues: 4
Cost: 9.6163887

