Load runtime data for programs performance on APPS training set.

In [None]:
import json
# load the json file
with open('models_eval/base_model_runtime_data.json', 'r') as file:
    data = json.load(file)
gen_runtimes = data['gen_runtimes']
baseline_runtimes = data['baseline_runtimes']
problem_ids = data['problem_ids']
problem_ids_bl = data['problem_ids_bl']

Reproduce APPS Train Data Runtime Distribution Figure 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Convert lists to NumPy arrays
problem_ids = np.array(problem_ids)
gen_runtimes = np.array(gen_runtimes)
problem_ids_bl = np.array(problem_ids_bl)
baseline_runtimes = np.array(baseline_runtimes)

# Separate data based on categories for gen solutions
interview_mask_gen = (problem_ids <= 1999)
competition_mask_gen = (problem_ids >= 2000) & (problem_ids <= 2360)
intro_mask_gen = (problem_ids >= 2361) & (problem_ids <= 4999)

# Separate data based on categories for baseline solutions
interview_mask_bl = (problem_ids_bl <= 1999)
competition_mask_bl = (problem_ids_bl >= 2000) & (problem_ids_bl <= 2360)
intro_mask_bl = (problem_ids_bl >= 2361) & (problem_ids_bl <= 4999)

# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(15, 5))

# Interview scatter plot
axs[1].scatter(problem_ids[interview_mask_gen], gen_runtimes[interview_mask_gen], label='gen solutions', alpha=0.5)
axs[1].scatter(problem_ids_bl[interview_mask_bl], baseline_runtimes[interview_mask_bl], label='baseline solutions', alpha=0.5)
axs[1].set_title('Interview Problems')
axs[1].set_xlabel('Problem ID')
axs[1].legend()
axs[1].grid(True, which='both', linestyle='--', linewidth=0.5)

# Competition scatter plot
axs[2].scatter(problem_ids[competition_mask_gen], gen_runtimes[competition_mask_gen], label='gen solutions', alpha=0.6)
axs[2].scatter(problem_ids_bl[competition_mask_bl], baseline_runtimes[competition_mask_bl], label='baseline solutions', alpha=0.6)
axs[2].set_title('Competition Problems')
axs[2].set_xlabel('Problem ID')
axs[2].legend()
axs[2].grid(True, which='both', linestyle='--', linewidth=0.5)

# Intro scatter plot
axs[0].scatter(problem_ids[intro_mask_gen], gen_runtimes[intro_mask_gen], label='gen solutions', alpha=0.7)
axs[0].scatter(problem_ids_bl[intro_mask_bl], baseline_runtimes[intro_mask_bl], label='baseline solutions', alpha=0.7)
axs[0].set_title('Introductory Problems')
axs[0].set_xlabel('Problem ID')
axs[0].set_ylabel('Runtime (ms)')
axs[0].legend()
axs[0].grid(True, which='both', linestyle='--', linewidth=0.5)

plt.tight_layout()
plt.savefig('models_eval/train_data_runtimes_by_difficulty.png')
plt.show()

In [None]:
# use np to calcaulte mean
import numpy as np
interview_runtime_Avg = np.mean(gen_runtimes[interview_mask_gen])
competition_runtime_Avg = np.mean(gen_runtimes[competition_mask_gen])
intro_runtime_Avg = np.mean(gen_runtimes[intro_mask_gen])
print("Average runtime for interview problems: ", interview_runtime_Avg)
print("Average runtime for competition problems: ", competition_runtime_Avg)
print("Average runtime for intro problems: ", intro_runtime_Avg)

Average runtime for interview problems:  25.750392004023134

Average runtime for competition problems:  133.42541732283465

Average runtime for intro problems:  4.329581621961442