### Objective:
The objective of the notebook is to -
* Build the final model of Exponential Smoothing Holt algorithm using the best hyperparameter set identified using Backtesting and score the test set to get a performance metric
* For forecasting future periods, we will re-train the model with the same hyperparameter set on the train + validation + test set to capture the patterns in the test set and then forecast future N periods

In [0]:
import yaml
import inspect
import glob
import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import Holt
from distutils.command.config import config
from tqdm.auto import tqdm
from datetime import timedelta
from datetime import datetime
import mlflow
from sklearn.metrics import mean_absolute_error,mean_squared_error
import os
import logging
import dotsi

In [0]:
# logging part
p_dir = "/tmp/"
log_file = "Holt_model_eval_retraining_scoring" + " (" +datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+ ").log"

logger = logging.getLogger('custom_log')
logger.setLevel(logging.DEBUG)

# Applying necessary formatter
fh = logging.FileHandler(p_dir+log_file)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
fh.setFormatter(formatter)
logger.addHandler(fh)

In [0]:
# Getting the default settings of hyperparameters. Used to check that user-provided hyperparameters must always be a subset of these.
def get_default_args(func) -> dict:
    """Function to get the default values of the hyperparameters for the given algorithm

    Parameters
    ----------
    func : constructor of the respective algorithm
        The name of the algorithm (Eg: Prophet,SARIMAX)

    Returns
    -------
    dict
        returns a dictionary of hyperparameters and the corresponding default values for the given algorithm
    """
    signature = inspect.signature(func)
    return {
        k: v.default if v.default is not inspect.Parameter.empty else None
        for k, v in signature.parameters.items()
        if k != 'self'
    }
    
default_hpps_fit = get_default_args(Holt.fit)
default_hpps_init = get_default_args(Holt.__init__)

In [0]:
%run ../../../0_Config.ipynb

In [0]:
logger.info("Config file read")
assert set(app_config["Algorithms"]["ExponentialSmoothingHolt"]["Hyperparameters"]['__init__'].keys()).\
           issubset(set(default_hpps_init.keys())),\
           'keys supplied by the user for the ExponentialSmoothingHolt Algorithm under __init__ method must be valid'
assert set(app_config["Algorithms"]["ExponentialSmoothingHolt"]["Hyperparameters"]['fit'].keys()).\
           issubset(set(default_hpps_fit.keys())),\
           'keys supplied by the user for the ExponentialSmoothingHolt Algorithm under fit method must be valid'

# For exporting the config file
temp_config = app_config.copy()

In [0]:
def frange(start,stop,step= 1):
    l = []
    i = start
    while(i < stop):
        l.append(round(i,len(str(step))))
        i = i+step
    return l

def drange(hyperparameters):
    l=[]
    for key in hyperparameters.keys():
        val = hyperparameters[key]
        if 'range' in val:
            val = val.replace('range','frange')
            new_str = 'total_list = '  + val
            _locals = locals()
            exec(new_str,globals(),_locals)
            without_dup = list(set(_locals['total_list']))
            hyperparameters[key] = without_dup
    return hyperparameters

In [0]:
fit_ = drange(app_config['Algorithms']['ExponentialSmoothingHolt']['Hyperparameters']['fit'])
init_ = drange(app_config['Algorithms']['ExponentialSmoothingHolt']['Hyperparameters']['__init__'])
for key in init_.keys():
    if(key in fit_.keys()):
        fit_[key] = list(set(fit_[key]+init_[key]))
    else:
        fit_[key] = list(init_[key])
if("endog" in fit_.keys()):
    del fit_["endog"]
    
fit_new = {}
for key in fit_.keys():
    temp = []
    for val in fit_[key]:
        if(type(val) == list):
            val = str(val)
        if((val!='None') and (val!='Null') and (val!=None)):
            temp.append(val)
    if(len(temp)>0):
        fit_new[key] = temp
        
app_config['Algorithms']['ExponentialSmoothingHolt']['Hyperparameters'] = fit_new

In [0]:
# Create the algo directory for storing the results
output_directory = app_config['output_dir_path']
root_dir = "Modeling_Results"
algorithm = "ExponentialSmoothingHolt"
algo_path = os.path.join(output_directory,root_dir,algorithm)
if not os.path.exists(algo_path):
    os.makedirs(algo_path)
logger.info("Created algorithm directory")    

logs_path = os.path.join(output_directory,root_dir,'logs',algorithm)
if not os.path.exists(logs_path):
    os.makedirs(logs_path)
logger.info("Created logs directory")

config_path = os.path.join(app_config['output_dir_path'],"Modeling_Results","config")
if not os.path.exists(config_path):
    os.makedirs(config_path)
logger.info("Created config directory")

In [0]:
hyperparameters_conf = dict(app_config["Algorithms"]["ExponentialSmoothingHolt"]["Hyperparameters"])
# print(hyperparameters_conf)

modeling_granularity_conf = app_config["modeling_granularity"]
# print(modeling_granularity_conf)

# Rename Start date and DV config
dv_config = app_config["dependent_variable"]
ds_config = app_config["date_var"]

# Eval metric broadcast
broadcast_metric = dotsi.Dict({"value":app_config['validation']['metric']})
broadcast_test_periods = dotsi.Dict({"value":app_config["validation"]["no_of_test_periods"]})

broadcast_granularity = dotsi.Dict({"value":modeling_granularity_conf})
broadcast_hyper_parameters = dotsi.Dict({"value":hyperparameters_conf})
broadcast_agg_metrics_req = dotsi.Dict({"value":app_config["validation"]["agg_metrics_req"]})
broadcast_tracking = dotsi.Dict({"value":app_config['tracking']})
mlflow_tracking_check = dotsi.Dict({"value":"Out of Sample"})
logger.info("Broadcasted the required variables")

In [0]:
# Reading the latest file based on timestamp
all_files = [file for file in os.listdir(algo_path)]
best_hyp_files = [file for file in all_files if "Best_hyperparameters (" in file]
best_hyp_files = [file.replace(".csv","") for file in best_hyp_files]
version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in best_hyp_files]
max_date = max(version_dates)
max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
req_file_name = [x for x in best_hyp_files if max_date in x]
best_hyp_param_results_file_path = os.path.join(algo_path,req_file_name[0]+".csv")
print(best_hyp_param_results_file_path)

best_hyperparam_results = pd.read_csv(best_hyp_param_results_file_path)
best_hyperparam_results = best_hyperparam_results[best_hyperparam_results['status']=='success'].reset_index(drop = True)
best_hyperparam_results[modeling_granularity_conf] = best_hyperparam_results[modeling_granularity_conf].astype(str)
best_hyperparam_results.replace(['true'],True, inplace = True)
best_hyperparam_results.replace(['false'],False, inplace = True)
best_hyperparam_results_broadcast = dotsi.Dict({"value":best_hyperparam_results})
logger.info("Read the best hyperparamter results")
best_hyperparam_results

Unnamed: 0,Div_No,Store_No,Base_UPC,smoothing_level,optimized,exponential,damped_trend,mape,wmape,bias,tracking_signal,mae,rmse,status
0,24,8,2200015934,0.3,True,False,False,80.304346,47.708468,0.013966,0.02312,2.736842,3.17169,success
1,24,14,4000042206,0.2,True,False,False,81.297428,62.700763,0.997237,0.167846,7.970382,9.630542,success
2,24,15,4000046410,0.5,True,False,False,97.731077,56.206586,-0.339574,-0.333742,4.007774,4.627488,success
3,24,17,4000000032,0.2,True,False,False,277.217256,116.357735,-0.591559,-0.497021,10.920396,13.72314,success
4,24,53,4000000263,0.2,True,False,False,738.986696,147.776533,-1.462214,1.338904,33.008197,35.961354,success
5,24,55,4000000051,0.5,True,False,False,148.302829,96.28275,1.443844,0.042765,4.432826,6.402693,success
6,24,55,4000005851,0.5,True,False,False,329.650909,199.931238,-2.451632,-1.774686,4.069806,4.809726,success


In [0]:
def get_forecast_UDF(df_data: pd.DataFrame)-> pd.DataFrame:
    """Function to perform final model building using the train data and score on the test data utilizing the broadcasted details from the config file

    Parameters
    ----------
    df_data : pd.DataFrame
        The dataset containing values for all the required variables

    Returns
    -------
    pd.DataFrame
        Returns a dataframe with the granularity,date,independent variables contributions if any and performance metrics for the training and the testing set
    """
    try:
        test_periods = int(broadcast_test_periods.value)
        if(broadcast_agg_metrics_req.value == True):
            train_index_start = df_data["train_index_start"].iloc[0]
            train_index_end = df_data["train_index_end"].iloc[0]
            test_i = df_data["test_index_end"].iloc[0]
            window_no = str(str(train_index_start)+" "+str(train_index_end)+" "+str(test_i)+" "+str(df_data["window_no"].iloc[0]))
        else:
            train_index_end = len(df_data) - test_periods
            test_i = len(df_data)
            window_no = str(1)
            
        df_data = df_data.sort_values(by=['ds'],ascending=True).reset_index(drop = True)
        hpt = best_hyperparam_results_broadcast.value

        # broadcast_granularity
        broadcast_gran = broadcast_granularity.value

        # get best hyperparameters for the given modeling granularity
        for x in list(broadcast_gran):
            hpt = hpt[hpt[x] == df_data[x].iloc[0]]

        # Train - test split
        train = df_data.iloc[:train_index_end]
        test = df_data.iloc[train_index_end:test_i]

        # Updating the default arguments with the parameters provided in the config
        hp_config = list(broadcast_hyper_parameters.value)
        def_args_init = get_default_args(Holt.__init__)
        for x in hp_config:
            if(x in def_args_init.keys()):
                temp_val = hpt[x].iloc[0]
                if(type(temp_val)==str):
                    if('[' in temp_val):
                        temp_val = eval(temp_val)
                def_args_init[x] = temp_val

        def_args = get_default_args(Holt.fit)
        for x in hp_config:
            if((x in def_args.keys()) and (x not in def_args_init.keys())):
                temp_val = hpt[x].iloc[0]
                if(type(temp_val)==str):
                    if('[' in temp_val):
                        temp_val = eval(temp_val)
                def_args[x] = temp_val
        def_args_init['endog'] = list(train['y'].values)

        # Calling the Exponential Smoothing Holt constructor with the hyperparameters of interest  
        m = Holt(**def_args_init)
        m = m.fit(**def_args)

        test['yhat'] = m.predict(start = test.index.min(),end = test.index.max())
        test[['test_flag','test_flag_agg']] = 1
        train['yhat'] = m.predict(start = train.index.min(),end = train.index.max())
        train['test_flag'] = np.where(train['ds']<df_data.iloc[-test_periods:]['ds'].min(),0,1)
        train['test_flag_agg'] = 0
        forecast_pd = pd.concat([train,test],ignore_index = True)

        results_pd = forecast_pd[broadcast_gran+['ds', 'y', 'yhat','test_flag','test_flag_agg']].reset_index(drop = True)
        # Sales or Quantity can't be negative hence
        results_pd["yhat"] = np.where(results_pd["yhat"]<0,0,results_pd["yhat"])

        # to handle erroneous results epsilon is set to 1.
        epsilon = 1

        temp_data1 = pd.DataFrame(index= range(1))
        temp_data2 = pd.DataFrame()
        results_pd_temp = results_pd[~((results_pd['test_flag']==1) & (results_pd['test_flag_agg']==0))]
        for val in [1,0]:
            temp_data = results_pd_temp[results_pd_temp['test_flag_agg']==val]
            y_pred = temp_data['yhat']
            y_true = temp_data['y']

            temp_data1['test_flag_agg'] = val
            # Eval. metrics calculation
            temp_data1['mape'] = np.mean(np.abs(y_true - y_pred) / np.maximum(np.abs(y_true), epsilon))*100  
            temp_data1['wmape'] = np.sum(np.abs(y_true - y_pred)) / np.maximum(np.sum(np.abs(y_true)),epsilon)*100  
            temp_data1['bias'] = np.mean((y_true - y_pred))  
            temp_data1['tracking_signal'] = np.sum((y_true - y_pred)) / np.mean(np.abs(y_true - y_pred))  
            temp_data1['mae'] = mean_absolute_error(y_true, y_pred)
            temp_data1['rmse']=np.sqrt(mean_squared_error(y_true, y_pred))
            temp_data2 = pd.concat([temp_data2,temp_data1],ignore_index = True)

        results_pd = pd.merge(results_pd,temp_data2,how='left',on='test_flag_agg')

        # To adhere to defined schema
        for x in broadcast_gran:   
            results_pd[x] = results_pd[x].astype(str)

        # Append Hyperparameters used
        for x in hp_config:
            results_pd[x] = hpt[x].iloc[0]
        
        # Get the experiment id
        tracking_value = broadcast_tracking.value.copy()
        if(mlflow_tracking_check.value == "Out of Sample" and tracking_value["tracking_needed"] == True):
            if(tracking_value['type']!="Managed"):
                if(tracking_value['tracking_uri'] is not None):
                    mlflow.set_tracking_uri("file:"+tracking_value['tracking_uri'])
                    experiment_id = mlflow.set_experiment(tracking_value["mlflow_experiment_id"])
                    tracking_value['mlflow_experiment_id'] = experiment_id.experiment_id
            #Add MLFlow code here
            with mlflow.start_run(experiment_id = tracking_value['mlflow_experiment_id']):
                mlflow.log_param('algorithm', 'ExponentialSmoothingHolt')
                mlflow.log_param('result_type', 'out_of_sample')
                for x in broadcast_gran:
                    mlflow.log_param(x, results_pd[x].iloc[0])
                for x in hp_config:
                    mlflow.log_param(x, results_pd[x].iloc[0])
                temp_test = results_pd[results_pd['test_flag']==1].reset_index(drop = True)
                for x in ["mape","wmape","bias","tracking_signal","mae","rmse"]:
                    mlflow.log_metric(x, temp_test[x].iloc[0])
                    
        results_pd["window"] = window_no            
        results_pd['status'] = 'success'
        return results_pd
    except Exception as e:
        results_pd = pd.DataFrame(columns = [['ds', 'y', 'yhat','mape','wmape','bias','tracking_signal','mae','rmse']+\
                          list(broadcast_hyper_parameters.value.keys()) + ['status','test_flag','test_flag_agg','window'] + broadcast_granularity.value],index = range(1))
        results_pd[broadcast_granularity.value] = df_data[broadcast_granularity.value].head(1).reset_index(drop = True)
        for x in broadcast_granularity.value:
            results_pd[x] = results_pd[x].astype(str)
        results_pd['status'] = str(e)  
        return results_pd

#### Loading the latest Missing_value_treatment file
##### Please update the reading path with the required data path if "Missing value treatment" was not run

In [0]:
# Reading the latest input file based on timestamp
all_files = [file for file in os.listdir(app_config['output_dir_path']+"/Data_Processing/Missing_value_treatment")]
missing_op_files = [file for file in all_files if "Missing_value_treatment_results (" in file]
missing_op_files = [file.replace(".csv","") for file in missing_op_files]
version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in missing_op_files]
max_date = max(version_dates)
max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
req_file_name = [x for x in missing_op_files if max_date in x]
missing_op_file_path = os.path.join(app_config['output_dir_path']+"/Data_Processing/Missing_value_treatment",req_file_name[0]+'.csv')
# print(missing_op_file_path)

# Reading the data
df = pd.read_csv(missing_op_file_path)
# print(df.shape)

df.rename(columns = {ds_config:"ds", dv_config:"y"}, inplace = True)
df['ds'] = pd.to_datetime(df['ds'])
df[modeling_granularity_conf] = df[modeling_granularity_conf].astype(str)

logger.info("Data loaded")
# print(list(broadcast_hyper_parameters.value.keys()))

gbcp = list(modeling_granularity_conf)

In [0]:
if(app_config["validation"]["agg_metrics_req"]):

    # Creating windows and then calling the modeling function
    test_periods = int(broadcast_test_periods.value)
    window_test_periods = app_config["validation"]["agg_metrics_test_periods"]
    stride = app_config["validation"]["agg_metrics_stride"]

    # Getting the total number of weeks for each time series
    temp_df = df.groupby(modeling_granularity_conf).agg({'ds':'count'}).rename(columns={'ds': '#total_weeks'}).reset_index()
    df = df.merge(temp_df, on = modeling_granularity_conf ,how = "left")

    unique_skuXds = df[modeling_granularity_conf+["#total_weeks"]].drop_duplicates().reset_index(drop = True)

    final_list = []
    gran_len = len(modeling_granularity_conf)
    
    for row1 in range(0,len(unique_skuXds)): 
        Total_weeks = unique_skuXds.loc[row1,'#total_weeks']
        train_interval = int(Total_weeks-test_periods)
        j = 0
        for train_i in range(train_interval,Total_weeks,stride):
            if(train_i+window_test_periods <=Total_weeks):
                test_i = train_i+window_test_periods
                final_list.append([unique_skuXds.iloc[row1,index] for index in range(gran_len)] + [0,train_i,train_i+window_test_periods,j+1])
                j += 1

    # create all windows combination.
    df_windows = pd.DataFrame([tuple(x) for x in final_list],columns =modeling_granularity_conf+['train_index_start','train_index_end','test_index_end','window_no'])
    f_df = df.merge(df_windows,on=modeling_granularity_conf,how="left")
        
    f_df['gran_tempp'] = f_df[gbcp+["window_no"]].astype(str).sum(axis=1)
    unique_pdts = f_df['gran_tempp'].unique()
    new_results = pd.DataFrame()
    for pdt in unique_pdts:
        new_results = pd.concat([new_results,get_forecast_UDF(f_df[f_df['gran_tempp']==pdt])])
            
    new_results.to_csv(algo_path+"/Out_of_sample_results_window_level ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)
    logger.info("Completed Backtesting")
    
    # Reading the latest Out_of_sample_results_window_level file based on timestamp
    all_files = [file for file in os.listdir(algo_path)]
    backtesting_files = [file for file in all_files if "Out_of_sample_results_window_level (" in file]
    backtesting_files = [file.replace(".csv","") for file in backtesting_files]
    version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in backtesting_files]
    max_date = max(version_dates)
    max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
    req_file_name = [x for x in backtesting_files if max_date in x]
    backtesting_results_file_path = os.path.join(algo_path,req_file_name[0] + ".csv")
    print(backtesting_results_file_path)

    # Reading the results of backtesting
    df = pd.read_csv(backtesting_results_file_path)
    df = df[df["status"] == "success"]
    
    df[modeling_granularity_conf] = df[modeling_granularity_conf].astype(str)
    df['ds'] = pd.to_datetime(df['ds'])

    # Roll up the data at Modeling granularity window level
    df_hyperparameters = best_hyperparam_results[gbcp + list(hyperparameters_conf)]

    # performance metrics
    per_met = ['status',"test_flag_agg","window","mape","wmape","bias","tracking_signal","mae","rmse"]
    df_metrics = df[gbcp + per_met].drop_duplicates()
    df_metrics1 = df_metrics.groupby(gbcp + ['test_flag_agg','status'])[["mape","wmape","bias","tracking_signal","mae","rmse"]].mean().reset_index()

    # Remaining columns
    rem_cols = list(set(df.columns) - set(per_met+list(hyperparameters_conf))) + ['test_flag_agg']
    dot_cols = [col for col in df.columns if "." in col] #to handle "."s
    for col in dot_cols:
        df.rename(columns = {col:col.replace(".","dot")}, inplace = True)
        rem_cols[rem_cols.index(col)] = col.replace(".","dot")
    rem_df = df[rem_cols]
    
    # Removing the training dates which falls in the test period
    rem_df = rem_df[~((rem_df['test_flag']==1) & (rem_df['test_flag_agg']==0))]
    group_cols = gbcp + ['ds','test_flag','test_flag_agg']
    agg_cols = list(set(rem_cols) - set(group_cols))
    exprs = {x: "mean" for x in agg_cols}
    rem_df1 = rem_df.groupby(group_cols).agg(exprs).reset_index()
    temp_cols = [col[:-1] if 'avg(' in col else col for col in rem_df1.columns ]
    temp_cols = [col.replace('avg(','') for col in temp_cols]
    rem_df1.columns = temp_cols

    for col in dot_cols:
        rem_df1.rename(columns = {col.replace(".","dot"):col.replace("dot",".")}, inplace = True)
                            
    # combining all the data
    df_forecast = rem_df1.merge(df_metrics1, on = gbcp + ['test_flag_agg'], how='left')
    df_forecast = df_forecast.merge(df_hyperparameters, on = gbcp , how='left')
    
else:    
    df['gran_tempp'] = df[gbcp].astype(str).sum(axis=1)
    unique_pdts = df['gran_tempp'].unique()
    df_forecast = pd.DataFrame()
    for pdt in unique_pdts:
        df_forecast = pd.concat([df_forecast,get_forecast_UDF(df[df['gran_tempp']==pdt])])
            
del(df_forecast['test_flag_agg'])
df_forecast['algorithm'] = 'ExponentialSmoothingHolt'

In [0]:
df_forecast.to_csv(algo_path+"/Out_of_sample_evaluation_results ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)
logger.info("Exported Out of sample evaluation results")

### Predicting future timeperiods

Uncomment the below cells if wants to predict the future, update the df respectively such that it contains entire historical data as well as idvs data for the required future forecast time periods

In [0]:
# broadcast_test_periods =  broadcast_variable_conf(4) # Provide the no. of timeperiods to forecast in the future

In [0]:
## Reading the latest input file based on timestamp
# all_files = [file for file in os.listdir(app_config['output_dir_path']+"/Data_Processing/Missing_value_treatment")]
# missing_op_files = [file for file in all_files if "Missing_value_treatment_results (" in file]
# missing_op_files = [file.replace(".csv","") for file in missing_op_files]
# version_dates = [datetime.strptime(x.split('(')[1].replace(')',''), '%Y-%m-%d-%H-%M-%S') for x in missing_op_files]
# max_date = max(version_dates)
# max_date = max_date.strftime('%Y-%m-%d-%H-%M-%S')
# req_file_name = [x for x in missing_op_files if max_date in x]
# missing_op_file_path = os.path.join(app_config['output_dir_path']+"/Data_Processing/Missing_value_treatment",req_file_name[0]+'.csv')
## print(missing_op_file_path)

## Reading the data
# df = pd.read_csv(missing_op_file_path)
## print(df.shape)

# df.rename(columns = {ds_config:"ds", dv_config:"y"}, inplace = True)
# df['ds'] = pd.to_datetime(df['ds'])
# df[modeling_granularity_conf] = df[modeling_granularity_conf].astype(str)

# # Broadcasting again with the "Future forecast" value since we won't be tracking the future forecast results
# mlflow_tracking_check = broadcast_required_info("Future forecast")
# logger.info("Data which contains the future forecast periods is loaded")

# gbcp = list(modeling_granularity_conf)

In [0]:
# df['gran_tempp'] = df[gbcp].astype(str).sum(axis=1)
# unique_pdts = df['gran_tempp'].unique()
# df_forecast = pd.DataFrame()
# for pdt in unique_pdts:
#     df_forecast = pd.concat([df_forecast,get_forecast_UDF(df[df['gran_tempp']==pdt])])
            
# del(df_forecast['test_flag_agg'])
# df_forecast['algorithm'] = 'ExponentialSmoothingHolt'

In [0]:
# df_forecast.to_csv(algo_path + "/Future_forecast_results ("+datetime.today().strftime('%Y-%m-%d-%H-%M-%S')+").csv", index = False)
# logger.info("Exported future forecast results")

In [0]:
# Exporting config file
config_file_name = "config_for_exp_id_"+str(broadcast_tracking.value['mlflow_experiment_id']) + " (" +datetime.today().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]+").yml"
config_path1 = os.path.join(config_path,config_file_name)
with open(config_path1, 'w') as file:
    yaml.dump(temp_config, file, default_flow_style=False,sort_keys=False)

In [0]:
# Move from tmp directory to req. location in datalake
import platform
plat_sys = platform.system()

if(plat_sys!='Windows'):
    log_file = log_file.replace(' (', '\ \(').replace(')','\)')
    os.system('mv /tmp/{0} {1}'.format(log_file,logs_path))