In [1]:
import json

# Initialize a dictionary to store total time for each stage
stage_times = {f'stage{i}_time': 0 for i in range(1, 8)}
stage_steps = {f'stage{i}_steps': 0 for i in range(1, 8)}

# Read the JSONL file
with open('stage_summaries.jsonl', 'r') as file:
    for line in file:
        data = json.loads(line)
        # Sum the times and steps for each stage
        for i in range(1, 8):
            stage_times[f'stage{i}_time'] += data.get(f'stage{i}_time', 0)
            stage_steps[f'stage{i}_steps'] += data.get(f'stage{i}_steps', 0)

# Find the stage with the maximum time
max_stage = max(stage_times, key=stage_times.get)
max_time = stage_times[max_stage]

# Calculate average time and steps per stage
average_times = {stage: time / len(stage_times) for stage, time in stage_times.items()}
average_steps = {stage: steps / len(stage_steps) for stage, steps in stage_steps.items()}

# Output the results
print(f"The stage that took the most time is {max_stage} with a total time of {max_time} seconds.")
print("Average time per stage:", average_times)
print("Average steps per stage:", average_steps)

The stage that took the most time is stage6_time with a total time of 90044.73489499092 seconds.
Average time per stage: {'stage1_time': 6657.990653991699, 'stage2_time': 7523.226215532848, 'stage3_time': 768.2301808425358, 'stage4_time': 6067.888418095453, 'stage5_time': 10536.983582632882, 'stage6_time': 12863.533556427274, 'stage7_time': 2360.5154154981888}
Average steps per stage: {'stage1_steps': 76.71428571428571, 'stage2_steps': 10.428571428571429, 'stage3_steps': 9.285714285714286, 'stage4_steps': 33.0, 'stage5_steps': 99.14285714285714, 'stage6_steps': 27.142857142857142, 'stage7_steps': 32.57142857142857}


In [4]:
import json
from collections import defaultdict

# Stage name mapping
stage_names = {
    1: "Understanding & Exploration",
    2: "Baseline Assessment",
    3: "Problem Analysis & Idea Generation",
    4: "Implementation",
    5: "Debugging & Error Handling",
    6: "Experimental Refinement",
    7: "Final Evaluation & Submission"
}

# Initialize dictionaries
stage_times = defaultdict(float)
stage_steps = defaultdict(int)
task_counts = defaultdict(int)
task_stage_times = defaultdict(lambda: defaultdict(float))
task_stage_steps = defaultdict(lambda: defaultdict(int))

# Read the JSONL file
with open('stage_summaries.jsonl', 'r') as file:
    data_list = []
    for line in file:
        data = json.loads(line)
        data_list.append(data)
        task = data['task']
        task_counts[task] += 1
        
        # Sum the times and steps for each stage
        for i in range(1, 8):
            stage_key = f'stage{i}_time'
            step_key = f'stage{i}_steps'
            
            if stage_key in data:
                stage_times[i] += data[stage_key]
                task_stage_times[task][i] += data[stage_key]
            
            if step_key in data:
                stage_steps[i] += data[step_key]
                task_stage_steps[task][i] += data[step_key]

# Find the stage with the maximum total time
max_stage_index = max(stage_times, key=stage_times.get)
max_stage_name = stage_names[max_stage_index]
max_time = stage_times[max_stage_index]

# Calculate total tasks
total_tasks = sum(task_counts.values())

# Calculate averages per stage across all tasks
stage_avg_times = {stage_names[i]: time / total_tasks for i, time in stage_times.items()}
stage_avg_steps = {stage_names[i]: steps / total_tasks for i, steps in stage_steps.items()}

# Calculate per-task statistics
task_statistics = {}
for task, count in task_counts.items():
    task_statistics[task] = {
        "count": count,
        "stage_times": {},
        "stage_steps": {}
    }
    
    for i in range(1, 8):
        if i in task_stage_times[task]:
            task_statistics[task]["stage_times"][stage_names[i]] = task_stage_times[task][i] / count
        if i in task_stage_steps[task]:
            task_statistics[task]["stage_steps"][stage_names[i]] = task_stage_steps[task][i] / count

# Output the results
print(f"Descriptive Analysis of Stage Times and Steps:\n")
print(f"1. The stage that took the most time overall is '{max_stage_name}' with a total time of {max_time:.2f} seconds.\n")

print("2. Average Time per Stage (across all tasks):")
for stage, avg_time in stage_avg_times.items():
    print(f"   - {stage}: {avg_time:.2f} seconds")

print("\n3. Average Steps per Stage (across all tasks):")
for stage, avg_steps in stage_avg_steps.items():
    print(f"   - {stage}: {avg_steps:.2f} steps")

print("\n4. Average Time and Steps per Task by Stage:")
for task, stats in task_statistics.items():
    print(f"   - Task '{task}' ({stats['count']} runs):")
    print(f"     * Average Time per Stage:")
    for stage, avg_time in stats["stage_times"].items():
        print(f"       - {stage}: {avg_time:.2f} seconds")
    print(f"     * Average Steps per Stage:")
    for stage, avg_steps in stats["stage_steps"].items():
        print(f"       - {stage}: {avg_steps:.2f} steps")

print("\n5. Total Number of Tasks Analyzed:", total_tasks)

# Calculate which stage took the most time for each task
print("\n6. Stage that took the most time for each task:")
for task, stage_times_dict in task_stage_times.items():
    max_stage_for_task = max(stage_times_dict, key=stage_times_dict.get)
    print(f"   - {task}: {stage_names[max_stage_for_task]} ({stage_times_dict[max_stage_for_task]:.2f} seconds)")

Descriptive Analysis of Stage Times and Steps:

1. The stage that took the most time overall is 'Experimental Refinement' with a total time of 90044.73 seconds.

2. Average Time per Stage (across all tasks):
   - Understanding & Exploration: 970.96 seconds
   - Baseline Assessment: 1097.14 seconds
   - Problem Analysis & Idea Generation: 112.03 seconds
   - Implementation: 884.90 seconds
   - Debugging & Error Handling: 1536.64 seconds
   - Experimental Refinement: 1875.93 seconds
   - Final Evaluation & Submission: 344.24 seconds

3. Average Steps per Stage (across all tasks):
   - Understanding & Exploration: 11.19 steps
   - Baseline Assessment: 1.52 steps
   - Problem Analysis & Idea Generation: 1.35 steps
   - Implementation: 4.81 steps
   - Debugging & Error Handling: 14.46 steps
   - Experimental Refinement: 3.96 steps
   - Final Evaluation & Submission: 4.75 steps

4. Average Time and Steps per Task by Stage:
   - Task 'product-recommendation' (8 runs):
     * Average Time per 

In [5]:
import json
from collections import defaultdict

# Stage name mapping
stage_names = {
    1: "Understanding & Exploration",
    2: "Baseline Assessment",
    3: "Problem Analysis & Idea Generation",
    4: "Implementation",
    5: "Debugging & Error Handling",
    6: "Experimental Refinement",
    7: "Final Evaluation & Submission"
}

# Initialize dictionaries
stage_times = defaultdict(float)
stage_steps = defaultdict(int)
task_counts = defaultdict(int)
task_stage_times = defaultdict(lambda: defaultdict(float))
task_stage_steps = defaultdict(lambda: defaultdict(int))
task_total_times = defaultdict(float)
task_total_steps = defaultdict(int)

# Read the JSONL file
with open('stage_summaries.jsonl', 'r') as file:
    for line in file:
        data = json.loads(line)
        task = data['task']
        task_counts[task] += 1
        
        # Get total time for this task run
        total_time = data.get('total_time', 0)
        total_steps = data.get('total_steps', 0)
        
        task_total_times[task] += total_time
        task_total_steps[task] += total_steps
        
        # Sum the times and steps for each stage
        for i in range(1, 8):
            stage_key = f'stage{i}_time'
            step_key = f'stage{i}_steps'
            
            if stage_key in data:
                stage_times[i] += data[stage_key]
                task_stage_times[task][i] += data[stage_key]
            
            if step_key in data:
                stage_steps[i] += data[step_key]
                task_stage_steps[task][i] += data[step_key]

# Find the stage with the maximum total time
max_stage_index = max(stage_times, key=stage_times.get)
max_stage_name = stage_names[max_stage_index]
max_time = stage_times[max_stage_index]

# Calculate total tasks
total_tasks = sum(task_counts.values())

# Calculate averages per stage across all tasks
stage_avg_times = {stage_names[i]: time / total_tasks for i, time in stage_times.items()}
stage_avg_steps = {stage_names[i]: steps / total_tasks for i, steps in stage_steps.items()}

# Calculate per-task statistics
task_statistics = {}
for task, count in task_counts.items():
    task_statistics[task] = {
        "count": count,
        "avg_total_time": task_total_times[task] / count,
        "avg_total_steps": task_total_steps[task] / count,
        "stage_times": {},
        "stage_steps": {}
    }
    
    for i in range(1, 8):
        if i in task_stage_times[task]:
            task_statistics[task]["stage_times"][stage_names[i]] = task_stage_times[task][i] / count
        if i in task_stage_steps[task]:
            task_statistics[task]["stage_steps"][stage_names[i]] = task_stage_steps[task][i] / count

# Output the results
print(f"Descriptive Analysis of Stage Times and Steps:\n")
print(f"1. The stage that took the most time overall is '{max_stage_name}' with a total time of {max_time:.2f} seconds.\n")

print("2. Average Time per Stage (across all tasks):")
for stage, avg_time in stage_avg_times.items():
    print(f"   - {stage}: {avg_time:.2f} seconds")

print("\n3. Average Steps per Stage (across all tasks):")
for stage, avg_steps in stage_avg_steps.items():
    print(f"   - {stage}: {avg_steps:.2f} steps")

print("\n4. Average Time and Steps per Task:")
for task, stats in task_statistics.items():
    print(f"   - Task '{task}':")
    print(f"     * Average Total Time: {stats['avg_total_time']:.2f} seconds")
    print(f"     * Average Total Steps: {stats['avg_total_steps']:.2f} steps")

print("\n5. Detailed Average Time and Steps per Task by Stage:")
for task, stats in task_statistics.items():
    print(f"   - Task '{task}' ({stats['count']} runs):")
    print(f"     * Average Time per Stage:")
    for stage, avg_time in stats["stage_times"].items():
        print(f"       - {stage}: {avg_time:.2f} seconds")
    print(f"     * Average Steps per Stage:")
    for stage, avg_steps in stats["stage_steps"].items():
        print(f"       - {stage}: {avg_steps:.2f} steps")

print("\n6. Total Number of Tasks Analyzed:", total_tasks)

# Calculate which stage took the most time for each task
print("\n7. Stage that took the most time for each task:")
for task, stage_times_dict in task_stage_times.items():
    max_stage_for_task = max(stage_times_dict, key=stage_times_dict.get)
    print(f"   - {task}: {stage_names[max_stage_for_task]} ({stage_times_dict[max_stage_for_task]:.2f} seconds total, {stage_times_dict[max_stage_for_task]/task_counts[task]:.2f} seconds avg)")

Descriptive Analysis of Stage Times and Steps:

1. The stage that took the most time overall is 'Experimental Refinement' with a total time of 90044.73 seconds.

2. Average Time per Stage (across all tasks):
   - Understanding & Exploration: 970.96 seconds
   - Baseline Assessment: 1097.14 seconds
   - Problem Analysis & Idea Generation: 112.03 seconds
   - Implementation: 884.90 seconds
   - Debugging & Error Handling: 1536.64 seconds
   - Experimental Refinement: 1875.93 seconds
   - Final Evaluation & Submission: 344.24 seconds

3. Average Steps per Stage (across all tasks):
   - Understanding & Exploration: 11.19 steps
   - Baseline Assessment: 1.52 steps
   - Problem Analysis & Idea Generation: 1.35 steps
   - Implementation: 4.81 steps
   - Debugging & Error Handling: 14.46 steps
   - Experimental Refinement: 3.96 steps
   - Final Evaluation & Submission: 4.75 steps

4. Average Time and Steps per Task:
   - Task 'product-recommendation':
     * Average Total Time: 6482.74 seconds