In [1]:
%load_ext autoreload
%autoreload 2

import os
import json
import copy
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
import ray_results_interpreter as rri
import subprocess
import concurrent.futures
from main_run import MainRun

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
# Create vanilla results dataframe from the provided data
testset_name = "serial_paper_comparison_8K"

results_interpretor = rri.RayResultsinterpreter()

def custom_data_filler(out_row, reference_row):
    out_row['path'] = reference_row['path']

def default_condition_setter(condition_name):
    return None

# Define paths for each architecture
architectures = {
    "Vanilla NN": f'/user/ml4723/Prj/NIC/ray_results/{testset_name}/vanilla_serial',
    "echelon_stock_hard": f'/user/ml4723/Prj/NIC/ray_results/{testset_name}/echelon_stock_hard',
}

sort_by = 'dev_loss'
pick_row_from_run_by = 'dev_loss'

# Define sample size
sample_size = 8192

# Create dataframes for each architecture
vanilla_dfs = []
echelon_dfs = []

# Get all combinations of lead times and underage costs
lead_times = [1, 2, 3, 4]
underage_costs = [4, 9, 19, 39]

# First, collect all echelon stock data to use as optimal baseline
for lead_time in lead_times:
    for underage_cost in underage_costs:
        df = results_interpretor.make_table({1: architectures["echelon_stock_hard"]},
            {'train_dev_sample_and_batch_size': [sample_size],
             'store_underage_cost': [underage_cost],
             'store_lead_time': [lead_time],
             'samples': [1]},
            default_condition_setter, custom_data_filler,
            sort_by=sort_by, pick_row_from_run_by=pick_row_from_run_by, test_loss_limit=25)
        
        if not df.empty:
            df.insert(2, 'Architecture Class', "echelon_stock_hard")
            df.insert(1, 'hyperparam_name', "echelon_stock_hard")
            echelon_dfs.append(df)

# Combine all echelon dataframes
echelon_df = pd.concat(echelon_dfs, ignore_index=True) if echelon_dfs else pd.DataFrame()

# Create a dictionary to store optimal losses for each configuration
optimal_losses = {}
if not echelon_df.empty:
    for _, row in echelon_df.iterrows():
        key = (row['store_lead_time'], row['store_underage_cost'])
        optimal_losses[key] = {
            'train_loss': row['Train Loss'],
            'dev_loss': row['Dev Loss'],
            'test_loss': row['Test Loss']
        }

# Now collect vanilla NN data and calculate gaps
results = []
for lead_time in lead_times:
    for underage_cost in underage_costs:
        df = results_interpretor.make_table({1: architectures["Vanilla NN"]},
            {'train_dev_sample_and_batch_size': [sample_size],
             'store_underage_cost': [underage_cost],
             'store_lead_time': [lead_time],
             'samples': [1]},
            default_condition_setter, custom_data_filler,
            sort_by=sort_by, pick_row_from_run_by=pick_row_from_run_by, test_loss_limit=25)
        
        if not df.empty:
            # Get the best run (lowest dev loss)
            best_run = df.iloc[0]
            
            # Get optimal losses for this configuration
            key = (lead_time, underage_cost)
            if key in optimal_losses:
                optimal = optimal_losses[key]
                
                # Calculate gaps
                train_gap = ((best_run['Train Loss'] - optimal['train_loss']) / optimal['train_loss']) * 100
                dev_gap = ((best_run['Dev Loss'] - optimal['dev_loss']) / optimal['dev_loss']) * 100
                test_gap = ((best_run['Test Loss'] - optimal['test_loss']) / optimal['test_loss']) * 100
                
                results.append({
                    'Lead Time': lead_time,
                    'Underage Cost': underage_cost,
                    'Train loss': round(best_run['Train Loss'], 2),
                    'Dev loss': round(best_run['Dev Loss'], 2),
                    'Test loss': round(best_run['Test Loss'], 2),
                    'Train gap (%)': round(train_gap, 2),
                    'Dev gap (%)': round(dev_gap, 2),
                    'Test gap (%)': round(test_gap, 2),
                    # 'Train optimal': round(optimal['train_loss'], 2),
                    # 'Dev optimal': round(optimal['dev_loss'], 2),
                    'Test optimal': round(optimal['test_loss'], 2),
                })

# Create the final table
results_df = pd.DataFrame(results)

# Sort by lead time and underage cost
results_df = results_df.sort_values(by=['Lead Time', 'Underage Cost'])

# Display the table
results_df.reset_index(drop=True)

Unnamed: 0,Lead Time,Underage Cost,Train loss,Dev loss,Test loss,Train gap (%),Dev gap (%),Test gap (%),Test optimal
0,1,4,6.99,6.98,6.96,1.3,0.98,0.98,6.89
1,1,9,8.47,8.44,8.42,1.01,0.84,0.78,8.36
2,1,19,9.75,9.69,9.68,1.3,0.9,0.9,9.59
3,1,39,10.84,10.82,10.79,1.19,1.15,1.03,10.68
4,2,4,7.66,7.67,7.65,0.71,0.79,0.79,7.59
5,2,9,9.4,9.34,9.32,1.31,0.72,0.79,9.25
6,2,19,10.85,10.79,10.77,1.38,1.04,1.1,10.65
7,2,39,12.02,12.03,12.02,0.59,0.99,1.08,11.89
8,3,4,8.29,8.29,8.26,0.89,0.78,0.78,8.2
9,3,9,10.19,10.14,10.11,1.26,0.91,0.89,10.02


In [3]:
# Calculate the mean and max test gap across all configurations
mean_test_gap = results_df['Test gap (%)'].mean()
max_test_gap = results_df['Test gap (%)'].max()

# Display the mean and max test gap
print(f"Mean Test Gap (%): {mean_test_gap:.2f}")
print(f"Max Test Gap (%): {max_test_gap:.2f}")


Mean Test Gap (%): 0.88
Max Test Gap (%): 1.10
