# Adressing distributional shifts in machine learning: The case of order fulfillment in customized production

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


In [2]:
# Mean absolute error
nonlin = 'rf'       
error = 'gaussian' 
capacity = 70
ratio = 1

# Average
mae_oracle = []
mae_elnet = []
mae_baseline = []
mae_adversarial = []
for theta in [1,2,3,4]:
    mae_oracle_run = []
    mae_elnet_run = []
    mae_baseline_run = []
    mae_adversarial_run = []
    for run in [1,2,3,4,5,6,7,8,9,10]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        oracle = df.loc['MAE', 'Oracle']
        elnet = df.loc['MAE', 'Elastic Net']
        baseline = df.loc['MAE', 'Artificial Neural Network']
        adversarial = df.loc['MAE', 'Adversarial']
        mae_oracle_run.append(oracle)
        mae_elnet_run.append(elnet)
        mae_baseline_run.append(baseline)
        mae_adversarial_run.append(adversarial)
    print("Welch's t-test for MAE (vs. linear model): " + str(stats.ttest_ind(mae_elnet_run, mae_adversarial_run, equal_var=False)))
    print("Welch's t-test for MAE (vs. nonlinear model): " + str(stats.ttest_ind(mae_baseline_run, mae_adversarial_run, equal_var=False)))
    mae_oracle.append(np.mean(mae_oracle_run))
    mae_elnet.append(np.mean(mae_elnet_run))
    mae_baseline.append(np.mean(mae_baseline_run))
    mae_adversarial.append(np.mean(mae_adversarial_run))

# Print MAE performance table 
performance = pd.DataFrame((np.zeros(shape=(4,4))), columns=["1", "2", "3", "4"], index=["Elastic Net", "Artificial Neural Network", "Adversarial", "Oracle"])
performance.loc["Elastic Net", "1"] = mae_elnet[0]
performance.loc["Artificial Neural Network", "1"] = mae_baseline[0]
performance.loc["Adversarial", "1"] = mae_adversarial[0]
performance.loc["Oracle", "1"] = mae_oracle[0]
performance.loc["Elastic Net", "2"] = mae_elnet[1]
performance.loc["Artificial Neural Network", "2"] = mae_baseline[1]
performance.loc["Adversarial", "2"] = mae_adversarial[1]
performance.loc["Oracle", "2"] = mae_oracle[1]
performance.loc["Elastic Net", "3"] = mae_elnet[2]
performance.loc["Artificial Neural Network", "3"] = mae_baseline[2]
performance.loc["Adversarial", "3"] = mae_adversarial[2]
performance.loc["Oracle", "3"] = mae_oracle[2]
performance.loc["Elastic Net", "4"] = mae_elnet[3]
performance.loc["Artificial Neural Network", "4"] = mae_baseline[3]
performance.loc["Adversarial", "4"] = mae_adversarial[3]
performance.loc["Oracle", "4"] = mae_oracle[3]
performance = round(performance, 1)
print(performance)

# Standard deviation
mae_oracle = []
mae_elnet = []
mae_baseline = []
mae_adversarial = []
for theta in [1,2,3,4]:
    mae_oracle_run = []
    mae_elnet_run = []
    mae_baseline_run = []
    mae_adversarial_run = []
    for run in [1,2,3,4,5,6,7,8,9,10]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        oracle = df.loc['MAE', 'Oracle']
        elnet = df.loc['MAE', 'Elastic Net']
        baseline = df.loc['MAE', 'Artificial Neural Network']
        adversarial = df.loc['MAE', 'Adversarial']
        mae_oracle_run.append(oracle)
        mae_elnet_run.append(elnet)
        mae_baseline_run.append(baseline)
        mae_adversarial_run.append(adversarial)
    mae_oracle.append(np.std(mae_oracle_run))
    mae_elnet.append(np.std(mae_elnet_run))
    mae_baseline.append(np.std(mae_baseline_run))
    mae_adversarial.append(np.std(mae_adversarial_run))

# Print MAE performance table 
performance2 = pd.DataFrame((np.zeros(shape=(4,4))), columns=["1", "2", "3", "4"], index=["Elastic Net", "Artificial Neural Network", "Adversarial", "Oracle"])
performance2.loc["Elastic Net", "1"] = mae_elnet[0]
performance2.loc["Artificial Neural Network", "1"] = mae_baseline[0]
performance2.loc["Adversarial", "1"] = mae_adversarial[0]
performance2.loc["Oracle", "1"] = mae_oracle[0]
performance2.loc["Elastic Net", "2"] = mae_elnet[1]
performance2.loc["Artificial Neural Network", "2"] = mae_baseline[1]
performance2.loc["Adversarial", "2"] = mae_adversarial[1]
performance2.loc["Oracle", "2"] = mae_oracle[1]
performance2.loc["Elastic Net", "3"] = mae_elnet[2]
performance2.loc["Artificial Neural Network", "3"] = mae_baseline[2]
performance2.loc["Adversarial", "3"] = mae_adversarial[2]
performance2.loc["Oracle", "3"] = mae_oracle[2]
performance2.loc["Elastic Net", "4"] = mae_elnet[3]
performance2.loc["Artificial Neural Network", "4"] = mae_baseline[3]
performance2.loc["Adversarial", "4"] = mae_adversarial[3]
performance2.loc["Oracle", "4"] = mae_oracle[3]
performance2 = round(performance2, 1)
print(performance2)


Welch's t-test for MAE (vs. linear model): Ttest_indResult(statistic=0.2129032258064436, pvalue=0.8343040532975678)
Welch's t-test for MAE (vs. nonlinear model): Ttest_indResult(statistic=7.677505216141276, pvalue=4.822401121023729e-07)
Welch's t-test for MAE (vs. linear model): Ttest_indResult(statistic=1.7792407578567366, pvalue=0.09213401500178992)
Welch's t-test for MAE (vs. nonlinear model): Ttest_indResult(statistic=6.385312745000084, pvalue=6.505954896520282e-05)
Welch's t-test for MAE (vs. linear model): Ttest_indResult(statistic=4.224882288272031, pvalue=0.0009788109880510634)
Welch's t-test for MAE (vs. nonlinear model): Ttest_indResult(statistic=10.242693206978362, pvalue=1.207193621715918e-06)
Welch's t-test for MAE (vs. linear model): Ttest_indResult(statistic=5.7365554409360895, pvalue=0.00010930601360703755)
Welch's t-test for MAE (vs. nonlinear model): Ttest_indResult(statistic=11.699027713063495, pvalue=4.154062486842527e-07)
                              1     2     3

In [3]:
# Scheduling cost
nonlin = 'rf'       
error = 'gaussian'  
capacity = 70
ratio = 1

# Average
scost_oracle = []
scost_elnet = []
scost_baseline = []
scost_adversarial = []
for theta in [1,2,3,4]:
    scost_oracle_run = []
    scost_elnet_run = []
    scost_baseline_run = []
    scost_adversarial_run = []
    for run in [1,2,3,4,5,6,7,8,9,10]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        oracle = df.loc['Scheduling cost', 'Oracle']
        elnet = df.loc['Scheduling cost', 'Elastic Net']
        baseline = df.loc['Scheduling cost', 'Artificial Neural Network']
        adversarial = df.loc['Scheduling cost', 'Adversarial']
        scost_oracle_run.append(oracle)
        scost_elnet_run.append(elnet)
        scost_baseline_run.append(baseline)
        scost_adversarial_run.append(adversarial)
    print("Welch's t-test for Scheduling cost (vs. linear model): " + str(stats.ttest_ind(scost_elnet_run, scost_adversarial_run, equal_var=False)))
    print("Welch's t-test for Scheduling cost (vs. nonlinear model): " + str(stats.ttest_ind(scost_baseline_run, scost_adversarial_run, equal_var=False)))
    scost_oracle.append(np.mean(scost_oracle_run))
    scost_elnet.append(np.mean(scost_elnet_run))
    scost_baseline.append(np.mean(scost_baseline_run))
    scost_adversarial.append(np.mean(scost_adversarial_run))

# Print scheduling cost table 
performance = pd.DataFrame((np.zeros(shape=(4,4))), columns=["1", "2", "3", "4"], index=["Elastic Net", "Artificial Neural Network", "Adversarial", "Oracle"])
performance.loc["Elastic Net", "1"] = scost_elnet[0]
performance.loc["Artificial Neural Network", "1"] = scost_baseline[0]
performance.loc["Adversarial", "1"] = scost_adversarial[0]
performance.loc["Oracle", "1"] = scost_oracle[0]
performance.loc["Elastic Net", "2"] = scost_elnet[1]
performance.loc["Artificial Neural Network", "2"] = scost_baseline[1]
performance.loc["Adversarial", "2"] = scost_adversarial[1]
performance.loc["Oracle", "2"] = scost_oracle[1]
performance.loc["Elastic Net", "3"] = scost_elnet[2]
performance.loc["Artificial Neural Network", "3"] = scost_baseline[2]
performance.loc["Adversarial", "3"] = scost_adversarial[2]
performance.loc["Oracle", "3"] = scost_oracle[2]
performance.loc["Elastic Net", "4"] = scost_elnet[3]
performance.loc["Artificial Neural Network", "4"] = scost_baseline[3]
performance.loc["Adversarial", "4"] = scost_adversarial[3]
performance.loc["Oracle", "4"] = scost_oracle[3]
performance = round(performance, 1)
print(performance)

# Standard deviation
scost_oracle = []
scost_elnet = []
scost_baseline = []
scost_adversarial = []
for theta in [1,2,3,4]:
    scost_oracle_run = []
    scost_elnet_run = []
    scost_baseline_run = []
    scost_adversarial_run = []
    for run in [1,2,3,4,5,6,7,8,9,10]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        oracle = df.loc['Scheduling cost', 'Oracle']
        elnet = df.loc['Scheduling cost', 'Elastic Net']
        baseline = df.loc['Scheduling cost', 'Artificial Neural Network']
        adversarial = df.loc['Scheduling cost', 'Adversarial']
        scost_oracle_run.append(oracle)
        scost_elnet_run.append(elnet)
        scost_baseline_run.append(baseline)
        scost_adversarial_run.append(adversarial)
    scost_oracle.append(np.std(scost_oracle_run))
    scost_elnet.append(np.std(scost_elnet_run))
    scost_baseline.append(np.std(scost_baseline_run))
    scost_adversarial.append(np.std(scost_adversarial_run))

# Print scheduling cost table 
performance2 = pd.DataFrame((np.zeros(shape=(4,4))), columns=["1", "2", "3", "4"], index=["Elastic Net", "Artificial Neural Network", "Adversarial", "Oracle"])
performance2.loc["Elastic Net", "1"] = scost_elnet[0]
performance2.loc["Artificial Neural Network", "1"] = scost_baseline[0]
performance2.loc["Adversarial", "1"] = scost_adversarial[0]
performance2.loc["Oracle", "1"] = scost_oracle[0]
performance2.loc["Elastic Net", "2"] = scost_elnet[1]
performance2.loc["Artificial Neural Network", "2"] = scost_baseline[1]
performance2.loc["Adversarial", "2"] = scost_adversarial[1]
performance2.loc["Oracle", "2"] = scost_oracle[1]
performance2.loc["Elastic Net", "3"] = scost_elnet[2]
performance2.loc["Artificial Neural Network", "3"] = scost_baseline[2]
performance2.loc["Adversarial", "3"] = scost_adversarial[2]
performance2.loc["Oracle", "3"] = scost_oracle[2]
performance2.loc["Elastic Net", "4"] = scost_elnet[3]
performance2.loc["Artificial Neural Network", "4"] = scost_baseline[3]
performance2.loc["Adversarial", "4"] = scost_adversarial[3]
performance2.loc["Oracle", "4"] = scost_oracle[3]
performance2 = round(performance2, 1)
print(performance2)


Welch's t-test for Scheduling cost (vs. linear model): Ttest_indResult(statistic=3.6349618454316133, pvalue=0.002675487588632289)
Welch's t-test for Scheduling cost (vs. nonlinear model): Ttest_indResult(statistic=9.152762534860349, pvalue=9.090087159621733e-08)
Welch's t-test for Scheduling cost (vs. linear model): Ttest_indResult(statistic=7.385928396133224, pvalue=1.621664701252461e-06)
Welch's t-test for Scheduling cost (vs. nonlinear model): Ttest_indResult(statistic=9.382796983761788, pvalue=2.0523979107089372e-06)
Welch's t-test for Scheduling cost (vs. linear model): Ttest_indResult(statistic=8.529835653383353, pvalue=3.3457249285246783e-06)
Welch's t-test for Scheduling cost (vs. nonlinear model): Ttest_indResult(statistic=13.530419672995706, pvalue=1.3519062867549992e-07)
Welch's t-test for Scheduling cost (vs. linear model): Ttest_indResult(statistic=8.747420957144808, pvalue=2.9244703340546246e-06)
Welch's t-test for Scheduling cost (vs. nonlinear model): Ttest_indResult(st