In [0]:
import pandas as pd
import glob
import yaml
import os
from datetime import datetime

In [0]:
%run ../0_Config.ipynb

In [0]:
# Choose the algorithms to see if any exceptions occured
algorithms = ['ElasticNet','ExponentialSmoothingHolt','ExponentialSmoothingHoltWinters','Lasso_cvglmnet','Prophet',\
              'SARIMAX','SimpleExponentialSmoothing','XGBoost','DeepAR','DeepState','LSTM'] 
output_directory = os.path.join(app_config['output_dir_path'],"Modeling_Results")

In [0]:
backtesting_results_fin = pd.DataFrame()

for algo in algorithms:
    algo_path = os.path.join(output_directory,algo)
    if glob.glob(algo_path):
        # Reading the latest file based on timestamp
        all_files = [file for file in os.listdir(algo_path)]
        backtesting_files = [file for file in all_files if "Backtesting_results_window_level (" in file]
        if(len(backtesting_files)>0):
            backtesting_files = [file.replace(".csv","") for file in backtesting_files]
            print(algo)
            version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in backtesting_files]
            max_date = max(version_dates)
            max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
            req_file_name = [x for x in backtesting_files if max_date in x]
            backtesting_results_file_path = os.path.join(algo_path,req_file_name[0] + '.csv')
            backtesting_results = pd.read_csv(backtesting_results_file_path)
            backtesting_results = backtesting_results[backtesting_results["status"] != "success"]
            if(algo in ['DeepAR','DeepState']):
                backtesting_results = backtesting_results[list(set(app_config['modeling_granularity']+\
                                                                          app_config["Algorithms"]["DeepAR"]["global_model_gran"]))+["status"]]
            else:
                backtesting_results = backtesting_results[app_config['modeling_granularity']+["status"]]
            backtesting_results['algorithm'] = algo
            backtesting_results_fin = pd.concat([backtesting_results_fin,backtesting_results], ignore_index = True)
        else:
            print("No Backtesting_results_window_level for "+algo)
    else:
        print(algo_path+" >>> does not exists")
            
# Dropping duplicates if any
backtesting_results_fin = backtesting_results_fin.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(backtesting_results_fin.shape[0]):    
    backtesting_results_fin.to_csv(export_path+"/Backtesting_exceptions_window_level ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

backtesting_results_fin

Unnamed: 0,Div_No,Store_No,Base_UPC,status,algorithm


In [0]:
best_hyperparam_results_fin = pd.DataFrame()

for algo in algorithms:
    algo_path = os.path.join(output_directory,algo)
    
    if glob.glob(algo_path):
        # Reading the latest file based on timestamp
        all_files = [file for file in os.listdir(algo_path)]
        best_hyperparameters_files = [file for file in all_files if "Best_hyperparameters (" in file]
        if(len(best_hyperparameters_files)>0):
            best_hyperparameters_files = [file.replace(".csv","") for file in best_hyperparameters_files]
            print(algo)
            version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in best_hyperparameters_files]
            max_date = max(version_dates)
            max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
            req_file_name = [x for x in best_hyperparameters_files if max_date in x]
            best_hyperparameters_file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
            best_hyperparam_results = pd.read_csv(best_hyperparameters_file_path)
            best_hyperparam_results = best_hyperparam_results[best_hyperparam_results['status'] != 'success']
            if(algo in ['DeepAR','DeepState']):
                best_hyperparam_results = best_hyperparam_results[app_config["Algorithms"]["DeepAR"]["global_model_gran"]+["status"]]
            else:
                best_hyperparam_results = best_hyperparam_results[app_config['modeling_granularity']+["status"]]
            best_hyperparam_results['algorithm'] = algo
            best_hyperparam_results_fin = pd.concat([best_hyperparam_results_fin,best_hyperparam_results], ignore_index = True)
        else:
            print("No Best_hyperparameters for "+algo)
    else:
        print(algo_path+" >>> does not exists")
        
# Dropping duplicates if any
best_hyperparam_results_fin = best_hyperparam_results_fin.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(best_hyperparam_results_fin.shape[0]):
    best_hyperparam_results_fin.to_csv(export_path+"/Best_hyperparameters_exceptions ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

best_hyperparam_results_fin

Unnamed: 0,Div_No,Store_No,Base_UPC,status,algorithm


In [0]:
Out_of_sample_results_fin = pd.DataFrame()

for algo in algorithms:
    algo_path = os.path.join(output_directory,algo)
    
    if glob.glob(algo_path):
        # Reading the latest file based on timestamp
        all_files = [file for file in os.listdir(algo_path)]
        Out_of_sample_files = [file for file in all_files if "Out_of_sample_evaluation_results (" in file]
        if(len(Out_of_sample_files)>0):
            Out_of_sample_files = [file.replace(".csv","") for file in Out_of_sample_files]
            print(algo)
            version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in Out_of_sample_files]
            max_date = max(version_dates)
            max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
            req_file_name = [x for x in Out_of_sample_files if max_date in x]
            Out_of_sample_file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
            Out_of_sample_results = pd.read_csv(Out_of_sample_file_path)
            Out_of_sample_results = Out_of_sample_results[Out_of_sample_results['status'] != 'success']
            Out_of_sample_results = Out_of_sample_results[app_config['modeling_granularity']+["status"]]
            Out_of_sample_results['algorithm'] = algo
            Out_of_sample_results_fin = pd.concat([Out_of_sample_results_fin,Out_of_sample_results], ignore_index = True)
        else:
            print("No Out_of_sample_evaluation_results for "+algo)
    else:
        print(algo_path+" >>> does not exists")
        
# Dropping duplicates if any
Out_of_sample_results_fin = Out_of_sample_results_fin.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(Out_of_sample_results_fin.shape[0]):
    Out_of_sample_results_fin.to_csv(export_path+"/Out_of_sample_evaluation_exceptions ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

Out_of_sample_results_fin

Unnamed: 0,Div_No,Store_No,Base_UPC,status,algorithm


In [0]:
future_forecast_results_fin = pd.DataFrame()

for algo in algorithms:
    algo_path = os.path.join(output_directory,algo)
    
    if glob.glob(algo_path):
        # Reading the latest file based on timestamp
        all_files = [file for file in os.listdir(algo_path)]
        future_forecast_files = [file for file in all_files if "Future_forecast_results (" in file]
        if(len(future_forecast_files)>0):
            future_forecast_files = [file.replace(".csv","") for file in future_forecast_files]
            print(algo)
            version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in future_forecast_files]
            max_date = max(version_dates)
            max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
            req_file_name = [x for x in future_forecast_files if max_date in x]
            future_forecast_file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
            future_forecast_results = pd.read_csv(future_forecast_file_path)
            future_forecast_results = future_forecast_results[future_forecast_results['status'] != 'success']
            future_forecast_results = future_forecast_results[app_config['modeling_granularity']+["status"]]
            future_forecast_results['algorithm'] = algo
            future_forecast_results_fin = pd.concat([future_forecast_results_fin,future_forecast_results], ignore_index = True)
        else:
            print("No Future_forecast_results for "+algo)
    else:
        print(algo_path+" >>> does not exists")
        
# Dropping duplicates if any
future_forecast_results_fin = future_forecast_results_fin.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(future_forecast_results_fin.shape[0]):
    future_forecast_results_fin.to_csv(export_path+"/Future_forecast_exceptions ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

future_forecast_results_fin

In [0]:
coeff_results = pd.DataFrame()

output_directory = os.path.join(app_config['output_dir_path'],"Data_Processing")
algo_path = os.path.join(app_config['output_dir_path'],"Data_Processing",'modeling_level_trend')
if glob.glob(algo_path):
    # Reading the latest file based on timestamp
    all_files = [file for file in os.listdir(algo_path)]
    files = [file for file in all_files if "granular_level_trend_results (" in file]
    if(len(files)>0):
        files = [file.replace(".csv","") for file in files]
        print('Granular level trend results')
        version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in files]
        max_date = max(version_dates)
        max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
        req_file_name = [x for x in files if max_date in x]
        file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
        results = pd.read_csv(file_path)
        results = results[results['status'] != 'success']
        results = results[app_config['modeling_granularity']+["status"]]
        coeff_results = pd.concat([coeff_results,results], ignore_index = True)
    else:
        print("No Granular level trend results")
else:
        print(algo_path+" >>> does not exists")
        
# Dropping duplicates if any
coeff_results = coeff_results.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(coeff_results.shape[0]):
    coeff_results.to_csv(export_path+"/granular_level_trend_exceptions ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

coeff_results  

Unnamed: 0,Div_No,Store_No,Base_UPC,status


In [0]:
coeff_results = pd.DataFrame()

algo_path = os.path.join(app_config['output_dir_path'],"Data_Processing",'higher_level_trend_si')
if glob.glob(algo_path):
    # Reading the latest file based on timestamp
    all_files = [file for file in os.listdir(algo_path)]
    files = [file for file in all_files if "higher_level_trend_si_results (" in file]
    if(len(files)>0):
        files = [file.replace(".csv","") for file in files]
        print('Higher level trend and si results')
        version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in files]
        max_date = max(version_dates)
        max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
        req_file_name = [x for x in files if max_date in x]
        file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
        results = pd.read_csv(file_path)
        results = results[results['status'] != 'success']
        results = results[app_config["data_processing"]['feature_engineering']['prophet_based']["higher_level_si_trend_creation"]["granularity"]+["status"]]
        coeff_results = pd.concat([coeff_results,results], ignore_index = True)
    else:
        print("No higher level trend and si results")
else:
        print(algo_path+" >>> does not exists")
        
# Dropping duplicates if any
coeff_results = coeff_results.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(coeff_results.shape[0]):
    coeff_results.to_csv(export_path+"/higher_level_trend_si_exceptions ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

coeff_results   

Unnamed: 0,Div_No,status


In [0]:
coeff_results = pd.DataFrame()

algo_path = os.path.join(app_config['output_dir_path'],"Data_Processing",'Seasonality_Index')
if glob.glob(algo_path):
    # Reading the latest file based on timestamp
    all_files = [file for file in os.listdir(algo_path)]
    files = [file for file in all_files if "SI_results (" in file]
    if(len(files)>0):
        files = [file.replace(".csv","") for file in files]
        print('si results')
        version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in files]
        max_date = max(version_dates)
        max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
        req_file_name = [x for x in files if max_date in x]
        file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
        results = pd.read_csv(file_path)
        results = results[results['status'] != 'success']
        results = results[app_config["data_processing"]['feature_engineering']['prophet_based']["higher_level_si_trend_creation"]["granularity"]+["status"]]
        coeff_results = pd.concat([coeff_results,results], ignore_index = True)
    else:
        print("No si results")
else:
        print(algo_path+" >>> does not exists")
        
# Dropping duplicates if any
coeff_results = coeff_results.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(coeff_results.shape[0]):
    coeff_results.to_csv(export_path+"/calculated_si_exceptions ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

coeff_results   

Unnamed: 0,Div_No,status


In [0]:
coeff_results = pd.DataFrame()

output_directory = os.path.join(app_config['output_dir_path'],"Feature_Selection")
algo_path = os.path.join(app_config['output_dir_path'],"Feature_Selection",'Lasso')
if glob.glob(algo_path):
    # Reading the latest file based on timestamp
    all_files = [file for file in os.listdir(algo_path)]
    files = [file for file in all_files if "lasso_feature_selection_results (" in file]
    if(len(files)>0):
        files = [file.replace(".csv","") for file in files]
        print('lasso feature selection results')
        version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in files]
        max_date = max(version_dates)
        max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
        req_file_name = [x for x in files if max_date in x]
        file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
        results = pd.read_csv(file_path)
        results = results[results['status'] != 'success']
        results = results[app_config["modeling_granularity"]+["status"]]
        coeff_results = pd.concat([coeff_results,results], ignore_index = True)
    else:
        print("No lasso feature selection results")
else:
        print(algo_path+" >>> does not exists")
        
# Dropping duplicates if any
coeff_results = coeff_results.drop_duplicates()

# Exporting the final file
export_path = os.path.join(output_directory, 'Exceptions')
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
if(coeff_results.shape[0]):
    coeff_results.to_csv(export_path+"/lasso_feature_selection_exceptions ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)

coeff_results       

Unnamed: 0,Div_No,status
