In [None]:
import pandas as pd
import numpy as np
from copy import deepcopy
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

In [None]:
def xgb_forecast(data,date):
    '''Takes a DataFrame with independent and dependent variables and a date and returns 
    predictions for the dependent variable one week ahead of the given date'''
    try:
        #Simulate forecasting at a given date by filtering the DF to include only data 
        #from before 
        df=createVars(data,date)
        #Make variables for the dependent variable at time t-1, t-2 ... t-5
        for N in range(1,5):
            df2=makelags(df,N)
        #Turn categorical variables into integers that XGBoost can use with LabelEncoders 
        var1_label_encoder=LabelEncoder()
        df2['categorical_var_1']=var1_label_encoder.fit_transform(df2['categorical_var_1'])
        var2_label_encoder=LabelEncoder()
        df2['categorical_var_2']=var2_label_encoder.fit_transform(df2['categorical_var_2'])
        
        #split into train and test sets
        train,test = get_train_test(df2)
         
        #split train and test sets into dependent variable and independent variables
        y_train=train['system_calls']
        x_train=train.drop('system_calls',1)
        x_test=test.drop('system_calls',1)
        y_test=test['system_calls']
        model=xgb.XGBRegressor()
        model.fit(x_train,y_train)
        predictions=model.predict(x_test)
        
        results_df=deepcopy(x_test)
        results_df['predictions']=predictions
        results_df['true_values']=y_test
        #make all negative values 0
        results_df['predictions']=np.where(results_df['predictions']<0,0,results_df['predictions'])
        #Return categorical variables to their original form
        results_df['categorical_var_1']=var1_label_encoder.inverse_transform(results_df['categorical_var_1'])
        results_df['categorical_var_2']=var2_label_encoder.inverse_transform(results_df['categorical_var_2'])
        #include only the columns we need
        xgb_preds=results_df[['categorical_var_1','categorical_var_2','month','year','WeeksLeft','predictions']]
        return [date,xgb_preds]
    except:
        print(f'{date} didnt work')
        return (date,np.nan)

In [None]:
dates = pd.date_range(end='2019-09-01', periods=12, freq='W')
results=[]
for date in dates:
    results.append(xgb_forecast(data,date))