# Adressing distributional shifts in machine learning: The case of order fulfillment in customized production

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


In [2]:
# Mean absolute error
nonlin = 'rf'       
error = 'gaussian'  
capacity = 70
ratio = 1

# Average
mae_retrain = []
mae_transfer = []
for theta in [1,2,3,4]:
    mae_retrain_run = []
    mae_transfer_run = []
    for run in [1,2,3,4,5,6,7,8]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        retrain = df.loc['MAE', 'Model retraining']
        transfer = df.loc['MAE', 'Transfer learning']
        mae_retrain_run.append(retrain)
        mae_transfer_run.append(transfer)
    mae_retrain.append(np.mean(mae_retrain_run))
    mae_transfer.append(np.mean(mae_transfer_run))

# Print MAE performance table 
performance = pd.DataFrame((np.zeros(shape=(2,4))), columns=["1", "2", "3", "4"], index=["Transfer learning", "Model retraining"])
performance.loc["Transfer learning", "1"] = mae_transfer[0]
performance.loc["Model retraining", "1"] = mae_retrain[0]
performance.loc["Transfer learning", "2"] = mae_transfer[1]
performance.loc["Model retraining", "2"] = mae_retrain[1]
performance.loc["Transfer learning", "3"] = mae_transfer[2]
performance.loc["Model retraining", "3"] = mae_retrain[2]
performance.loc["Transfer learning", "4"] = mae_transfer[3]
performance.loc["Model retraining", "4"] = mae_retrain[3]
performance = round(performance, 1)
print(performance)

# Standard deviation
mae_retrain = []
mae_transfer = []
for theta in [1,2,3,4]:
    mae_retrain_run = []
    mae_transfer_run = []
    for run in [1,2,3,4,5,6,7,8]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        retrain = df.loc['MAE', 'Model retraining']
        transfer = df.loc['MAE', 'Transfer learning']
        mae_retrain_run.append(retrain)
        mae_transfer_run.append(transfer)
    mae_retrain.append(np.std(mae_retrain_run))
    mae_transfer.append(np.std(mae_transfer_run))

# Print MAE performance table 
performance2 = pd.DataFrame((np.zeros(shape=(2,4))), columns=["1", "2", "3", "4"], index=["Transfer learning", "Model retraining"])
performance2.loc["Transfer learning", "1"] = mae_transfer[0]
performance2.loc["Model retraining", "1"] = mae_retrain[0]
performance2.loc["Transfer learning", "2"] = mae_transfer[1]
performance2.loc["Model retraining", "2"] = mae_retrain[1]
performance2.loc["Transfer learning", "3"] = mae_transfer[2]
performance2.loc["Model retraining", "3"] = mae_retrain[2]
performance2.loc["Transfer learning", "4"] = mae_transfer[3]
performance2.loc["Model retraining", "4"] = mae_retrain[3]
performance2 = round(performance2, 1)
print(performance2)


                      1     2     3     4
Transfer learning  49.0  48.5  48.6  50.4
Model retraining   33.4  38.4  42.8  47.3
                     1    2    3    4
Transfer learning  1.2  1.6  1.8  2.1
Model retraining   1.8  2.1  3.9  2.4


In [3]:
# Scheduling cost
nonlin = 'rf'      
error = 'gaussian'  
capacity = 70
ratio = 1

# Average
scost_retrain = []
scost_transfer = []
for theta in [1,2,3,4]:
    scost_retrain_run = []
    scost_transfer_run = []
    for run in [1,2,3,4,5,6,7,8]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        retrain = df.loc['Scheduling cost', 'Model retraining']
        transfer = df.loc['Scheduling cost', 'Transfer learning']
        scost_retrain_run.append(retrain)
        scost_transfer_run.append(transfer)
    scost_retrain.append(np.mean(scost_retrain_run))
    scost_transfer.append(np.mean(scost_transfer_run))

# Print scheduling cost performance table 
performance = pd.DataFrame((np.zeros(shape=(2,4))), columns=["1", "2", "3", "4"], index=["Transfer learning", "Model retraining"])
performance.loc["Transfer learning", "1"] = scost_transfer[0]
performance.loc["Model retraining", "1"] = scost_retrain[0]
performance.loc["Transfer learning", "2"] = scost_transfer[1]
performance.loc["Model retraining", "2"] = scost_retrain[1]
performance.loc["Transfer learning", "3"] = scost_transfer[2]
performance.loc["Model retraining", "3"] = scost_retrain[2]
performance.loc["Transfer learning", "4"] = scost_transfer[3]
performance.loc["Model retraining", "4"] = scost_retrain[3]
performance = round(performance, 1)
print(performance)

# Standard deviation
scost_retrain = []
scost_transfer = []
for theta in [1,2,3,4]:
    scost_retrain_run = []
    scost_transfer_run = []
    for run in [1,2,3,4,5,6,7,8]:
        file_name = 'results_final/theta_' + str(theta) + '_run_' + str(run) + '_' + nonlin + '_' + error + '_capacity_' + str(capacity) + '_cost-ratio_' + str(ratio) + '_final_results.csv'
        df = pd.read_csv(file_name, index_col=0)
        retrain = df.loc['Scheduling cost', 'Model retraining']
        transfer = df.loc['Scheduling cost', 'Transfer learning']
        scost_retrain_run.append(retrain)
        scost_transfer_run.append(transfer)
    scost_retrain.append(np.std(scost_retrain_run))
    scost_transfer.append(np.std(scost_transfer_run))

# Print scheduling cost performance table 
performance2 = pd.DataFrame((np.zeros(shape=(2,4))), columns=["1", "2", "3", "4"], index=["Transfer learning", "Model retraining"])
performance2.loc["Transfer learning", "1"] = scost_transfer[0]
performance2.loc["Model retraining", "1"] = scost_retrain[0]
performance2.loc["Transfer learning", "2"] = scost_transfer[1]
performance2.loc["Model retraining", "2"] = scost_retrain[1]
performance2.loc["Transfer learning", "3"] = scost_transfer[2]
performance2.loc["Model retraining", "3"] = scost_retrain[2]
performance2.loc["Transfer learning", "4"] = scost_transfer[3]
performance2.loc["Model retraining", "4"] = scost_retrain[3]
performance2 = round(performance2, 1)
print(performance2)


                        1       2       3       4
Transfer learning  4881.0  4780.1  4828.1  4966.6
Model retraining   3379.5  3883.1  4321.2  4729.5
                       1      2      3      4
Transfer learning  161.8  189.1  196.3  229.1
Model retraining   199.6  226.2  400.7  261.7
