In [None]:
# count the number of runs by framework under the runs/ directory as a pandas dataframe

import json
import os
import pandas as pd

def get_sprints_df(runs_directory='../runs/'):
    sprints_df = pd.DataFrame(columns=[
        "framework",
        "run_id",
        "sprint_id",
        "start_time",
        "end_time",
        "tokens_in",
        "tokens_out",
        "api_calls",
        "cached_tokens",
        "status"
    ])

    invalid_runs_df = pd.DataFrame(columns=[
        "framework",
        "run_id",
        "reason"
    ])

    # Iterate over each subdirectory in the runs directory to find each framework used
    for framework_dir in os.listdir(runs_directory):
        framework_path = os.path.join(runs_directory, framework_dir)
        if os.path.isdir(framework_path):
            # Processing each framework directory
            for run_dir in os.listdir(framework_path):
                run_path = os.path.join(framework_path, run_dir)
                # loading the "metrics.json" file under each run directory
                metrics_path = os.path.join(run_path, "metrics.json")
                metrics_data = {}
                if os.path.isfile(metrics_path):
                    with open(metrics_path, 'r') as f:
                        metrics_data = json.load(f)
                else:
                    # log invalid run
                    invalid_runs_df = pd.concat([invalid_runs_df, pd.DataFrame([{
                        "framework": framework_dir,
                        "run_id": run_dir,
                        "reason": "metrics.json file not found"
                    }])], ignore_index=True)
                    continue

                # in metrics_data, we can find the "sprints" information under the "steps" key
                # there, 'step' means 'sprint'
                # "steps": [
                #     {
                #     "step_number": 1,
                #     "command": "Create a simple web application for a Student entity with name field using Python, FastAPI, and SQLite.\n\nRequirements:\n- You must use the best practices for structuring a simple Python web application\n- Use Python 3.11+ with FastAPI framework\n- SQLite database for persistence\n- Create Student entity with only:\n  - name (string, required)\n- API should return JSON responses\n- Database schema should be created automatically on startup",
                #     "duration_seconds": 28.370058059692383,
                #     "success": true,
                #     "retry_count": 0,
                #     "hitl_count": 0,
                #     "tokens_in": 11634,
                #     "tokens_out": 2897,
                #     "api_calls": 6,
                #     "cached_tokens": 0,
                #     "start_timestamp": 1761442133,
                #     "end_timestamp": 1761442161,
                #     "verification_status": "pending"
                #     },
                #     ...                
                if "steps" in metrics_data:
                    for sprint in metrics_data["steps"]:
                        # add a new entry to the sprints_df
                        sprint_id = sprint.get("step_number", None)
                        start_time = sprint.get("start_timestamp", None)
                        end_time = sprint.get("end_timestamp", None)
                        tokens_in = sprint.get("tokens_in", None)
                        tokens_out = sprint.get("tokens_out", None)
                        api_calls = sprint.get("api_calls", None)
                        cached_tokens = sprint.get("cached_tokens", None)
                        status = sprint.get("verification_status", None)

                        # add a new row to the dataframe
                        sprints_df = pd.concat([sprints_df, pd.DataFrame([{
                            "framework": framework_dir,
                            "run_id": run_dir,
                            "sprint_id": sprint_id,
                            "start_time": start_time,
                            "end_time": end_time,
                            "tokens_in": tokens_in,
                            "tokens_out": tokens_out,
                            "api_calls": api_calls,
                            "cached_tokens": cached_tokens,
                            "status": status
                        }])], ignore_index=True)

    return sprints_df, invalid_runs_df

sprints_df, invalid_runs_df = get_sprints_df()
sprints_df



In [None]:
invalid_runs_df

In [None]:
invalid_runs_df.to_csv('invalid_runs-metrics_not_found.csv', index=False)

In [None]:
# finding the number of unique runs per framework
run_counts = sprints_df.groupby('framework')['run_id'].nunique()
run_counts

In [None]:
# for the valid runs, find the sprints with the tokens in or tokens out equal to zero or NaN
invalid_sprints_df = sprints_df[(sprints_df['tokens_in'] == 0) | (sprints_df['tokens_out'] == 0) | (sprints_df['tokens_in'].isna()) | (sprints_df['tokens_out'].isna())]
invalid_sprints_df

In [None]:
# save as csv
invalid_sprints_df.to_csv('invalid_sprints-tokens_count_zero.csv', index=False)

In [None]:
# finding the unique frameworks with invalid sprints
invalid_frameworks = invalid_sprints_df['framework'].unique()
invalid_frameworks

In [None]:
# finding the unique runs with invalid sprints
invalid_runs2 = invalid_sprints_df[['framework', 'run_id']].drop_duplicates()
invalid_runs2.reset_index(drop=True, inplace=True)
# saving the reason
invalid_runs2['reason'] = 'Some sprint with tokens count zero or NaN'
# save as csv
invalid_runs2.to_csv('invalid_runs-tokens_count_zero.csv', index=False)
invalid_runs2


In [None]:
# finding the unique sprints with invalid token counts
invalid_sprint_ids = invalid_sprints_df['sprint_id'].unique()
invalid_sprint_ids

In [None]:
# finding the number of unique sprints per framework and run_id in the invalid_sprints_df
invalid_sprint_counts = invalid_sprints_df.groupby(['framework', 'run_id'])['sprint_id'].nunique()
invalid_sprint_counts

In [None]:
# install matplotlib and seaborn if not already installed
%pip install matplotlib seaborn -q

In [None]:
# show a frequency distribution plot showing the count of occurrences of each step_id of invalid_sprint_counts
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.histplot(invalid_sprint_counts, bins=len(invalid_sprint_counts.unique()), kde=False)
plt.title('Frequency Distribution of Invalid Sprint Counts per (Framework, Run ID)')
plt.xlabel('Number of Invalid Sprints')
plt.ylabel('Frequency')
plt.show()