In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import os
import numpy as np
import yaml
import glob
import re

# Examine Run Lengths and Successes

In [2]:
def get_seed_from_filename(filename):
    pattern_regex = f"(\d)_(\d\d)_(19|20)\d\d.(0[1-9]|1[012]).(0[1-9]|[12][0-9]|3[01])"
    search = re.search(pattern_regex, filename)
    if search is None:
        return None
    return int(search.group(2))

In [3]:
def filter_duplicate_seeds(file_names):
    file_names = sorted(file_names)
    filtered_names = {}
    for name in file_names:
        seed = get_seed_from_filename(name)
        filtered_names[seed] = name
    return list(filtered_names.values())

In [12]:
run_lengths = {}
final_successes = {}
min_id = 123
max_id = 132
for i in range(min_id, max_id+1):
    run_lengths[i] = []
    final_successes[i] = []
    file_names = sorted(glob.glob(f'../exp_local/{i}_*/eval.csv'))
    file_names = filter_duplicate_seeds(file_names)
    for file_name in file_names:
        data = pd.read_csv(file_name)
        run_lengths[i].append(data.frame.iat[-1])
        final_successes[i].append(data.max_success.iat[-1])

In [None]:
# Print run lengths and final successes (for copying to google sheets)
for i in range(min_id, max_id+1):
    print(f"{str(run_lengths[i])[1:-1]}, {str(final_successes[i])[1:-1]}")
for i in range(1000):
    print("\n")

In [None]:
# Compare original and V1
for i in range(min_id, max_id+1, 2):
    original_result = np.average(final_successes[i])
    v1_result = np.average(final_successes[i+1])
    original_length = np.average(run_lengths[i])
    v1_length = np.average(run_lengths[i+1])
    print(f"{original_result}, {v1_result}, {v1_result-original_result}, {v1_length - original_length}")

In [None]:
# Save the old run lengths, get new run lengths, and print the delta between the two
# This can help show which runs are still updating/running
old_run_lengths = run_lengths.copy()

run_lengths = {}
for i in range(min_id, max_id+1):
    run_lengths[i] = []
    file_names = sorted(glob.glob(f'exp_local/{i}_*/eval.csv'))
    file_names = filter_duplicate_seeds(file_names)
    for file_name in file_names:
        data = pd.read_csv(file_name)
        run_lengths[i].append(data.frame.iat[-1])

for i in range(min_id, max_id+1):
    print(f"{i}: {[new-old for new,old in zip(run_lengths[i], old_run_lengths[i])]}")

# Get Stats on Hydra Config Settings for Experiments

In [3]:
sizes = {}
a = {}
attr = 'seed'
final_results_dir = 'final_results'
for run_type_dir in sorted(os.listdir(final_results_dir)):
    sizes[run_type_dir] = {}
    a[run_type_dir] = {}
    for run_dir in os.listdir(final_results_dir + '/' + run_type_dir):
        full_path = final_results_dir + '/' + run_type_dir + '/' + run_dir
        full_path += '/.hydra/config.yaml'

        size = yaml.safe_load(open(full_path, "r"))[attr]
        if size in sizes[run_type_dir]:
            sizes[run_type_dir][size] += 1
            a[run_type_dir][size].append(run_dir)
        else:
            sizes[run_type_dir][size] = 1
            a[run_type_dir][size] = [run_dir]