In [21]:
from pathlib import Path

import numpy as np
import pandas as pd

from java_migration.analysis.utils import flatten, get_experiment_data

In [22]:
experiment_path = Path(
    "/home/user/java-migration-paper/data/experiments/2025-03-13/18-28-16-boring-kowalevski"
    # "/home/user/java-migration-paper/data/experiments/2025-03-13/19-03-16-kind-easley"
)
exp_data = get_experiment_data(experiment_path)

In [23]:
def get_experiment_features(exp_data):
    exp_feats = []
    for repo_name, repo_data in exp_data.items():
        cur_feats = {"repo_name": repo_name}
        if "build_result" in repo_data and repo_data["build_result"] is not None:
            cur_feats.update(flatten(repo_data["build_result"]))
        exp_feats.append(cur_feats)
        if "agent_log" in repo_data:
            cur_feats["run_duration"] = np.sum([y.meta.duration for y in repo_data["agent_log"].steps])
            cur_feats["input_tokens"] = repo_data["agent_log"].steps[-1].meta.input_tokens
            cur_feats["output_tokens"] = repo_data["agent_log"].steps[-1].meta.output_tokens
            cur_feats["cost"] = (
                cur_feats["input_tokens"] * 0.15 / 1_000_000 + cur_feats["output_tokens"] * 0.6 / 1_000_000
            )
            cur_feats["num_steps"] = len(repo_data["agent_log"].steps)
            if len(repo_data["agent_log"].steps) > 1:
                if repo_data["agent_log"].steps[-2].raw_text:
                    cur_feats["vertex_fail"] = (
                        "ServiceUnavailableError" in repo_data["agent_log"].steps[-2].raw_text
                        or "RateLimitError" in repo_data["agent_log"].steps[-2].raw_text
                    )
                else:
                    cur_feats["vertex_fail"] = False
            else:
                cur_feats["vertex_fail"] = False
        if "repo_feats" in repo_data:
            cur_feats.update(repo_data["repo_feats"])

    return exp_feats


exp_features = get_experiment_features(exp_data)
df = pd.DataFrame(exp_features)
df

Unnamed: 0,repo_name,build_result_build_success,build_result_test_results,build_result_test_success,error,run_success,run_duration,input_tokens,output_tokens,cost,...,build_tool,number_of_external_dependencies,number_of_java_files,number_of_lines_of_code,number_of_modules,number_of_unit_tests,build_result_test_results_errors,build_result_test_results_failures,build_result_test_results_skipped,build_result_test_results_tests_run
0,nydiarra/springboot-jwt,False,,,,True,448.59,4261169,56529,0.673093,...,maven,22,16,557,1,1,,,,
1,Ouyangan/hunt-admin,True,,True,,True,59.14,306082,2588,0.047465,...,maven,95,77,10788,6,59,,,,
2,Netflix/Surus,False,,,,True,213.25,822954,21300,0.136223,...,maven,8,5,1027,1,10,,,,
3,EalenXie/spring-microservice-ddd,False,,,,True,621.79,9033256,30837,1.373491,...,maven,14,34,2053,1,1,,,,
4,SPuerBRead/Bridge,False,,,,True,15.15,10808,942,0.002186,...,maven,16,32,2408,1,0,,,,
5,Codecademy/EventHub,True,,True,,True,774.85,7411946,49209,1.141317,...,maven,12,69,4589,2,21,0.0,0.0,0.0,42.0
6,DeemOpen/zkui,True,,True,,True,77.15,28349,1016,0.004862,...,maven,28,20,2222,1,0,0.0,0.0,0.0,0.0
7,perwendel/spark,True,,True,,True,580.83,535055,4718,0.083089,...,maven,28,100,13947,1,320,0.0,0.0,0.0,460.0
8,ata4/disunity,True,,True,,True,117.23,185992,5323,0.031093,...,maven,12,95,7250,3,6,0.0,0.0,0.0,328.0
9,EnterpriseQualityCoding/FizzBuzzEnterpriseEdition,False,,,,True,484.45,2683509,29960,0.420502,...,maven,12,87,2489,1,1,,,,


In [24]:
print(f"""
Total repos: {len(df)}
Compilation Success: {df.build_result_build_success.sum()}
Test Success: {df.build_result_test_success.sum()}
Failed due to LLM API issues: {df.vertex_fail.sum()}
Cost: {df.cost.sum()}
""")


Total repos: 15
Compilation Success: 7
Test Success: 6
Failed due to LLM API issues: 0
Cost: 5.9062362



In [25]:
# Total repos: 15
# Compilation Success: 9
# Test Success: 8
# Failed due to LLM API issues: 0
# Cost: 13.7757765

# Total repos: 15
# Compilation Success: 7
# Test Success: 6
# Failed due to LLM API issues: 0
# Cost: 5.9062362

SyntaxError: invalid syntax (1407688227.py, line 1)