In [24]:
import pandas as pd
import numpy as np
from xgboost import plot_importance, plot_tree
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [25]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [26]:
#drop cols
X_train = train.drop(['ConfirmedCases','Fatalities','Id'],1)

In [27]:
# make date a datetime object
X_train['Date'] = pd.to_datetime(X_train['Date'])

In [28]:
def create_time_features(df):
    """
    Creates time series features from datetime index
    """
    df['hour'] = df['Date'].dt.hour
    df['dayofweek'] = df['Date'].dt.dayofweek
    df['quarter'] = df['Date'].dt.quarter
    df['month'] = df['Date'].dt.month
    df['year'] = df['Date'].dt.year
    df['dayofyear'] = df['Date'].dt.dayofyear
    df['dayofmonth'] = df['Date'].dt.day
    df['weekofyear'] = df['Date'].dt.weekofyear
    
    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    return X

In [29]:
create_time_features(X_train)

Unnamed: 0,hour,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear
0,0,2,1,1,2020,22,22,4
1,0,3,1,1,2020,23,23,4
2,0,4,1,1,2020,24,24,4
3,0,5,1,1,2020,25,25,4
4,0,6,1,1,2020,26,26,4
5,0,0,1,1,2020,27,27,5
6,0,1,1,1,2020,28,28,5
7,0,2,1,1,2020,29,29,5
8,0,3,1,1,2020,30,30,5
9,0,4,1,1,2020,31,31,5


In [31]:
X_train.drop("Date", axis=1, inplace=True)

In [33]:
X_train = pd.concat([X_train,pd.get_dummies(X_train['Province/State'], prefix='ps')],axis=1)
X_train.drop(['Province/State'],axis=1, inplace=True)

In [34]:
X_train = pd.concat([X_train,pd.get_dummies(X_train['Country/Region'], prefix='cr')],axis=1)
X_train.drop(['Country/Region'],axis=1, inplace=True)

In [48]:
X_train.head()

Unnamed: 0,Lat,Long,hour,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear,...,cr_Turkey,cr_US,cr_Ukraine,cr_United Arab Emirates,cr_United Kingdom,cr_Uruguay,cr_Uzbekistan,cr_Venezuela,cr_Vietnam,cr_Zambia
0,33.0,65.0,0,2,1,1,2020,22,22,4,...,0,0,0,0,0,0,0,0,0,0
1,33.0,65.0,0,3,1,1,2020,23,23,4,...,0,0,0,0,0,0,0,0,0,0
2,33.0,65.0,0,4,1,1,2020,24,24,4,...,0,0,0,0,0,0,0,0,0,0
3,33.0,65.0,0,5,1,1,2020,25,25,4,...,0,0,0,0,0,0,0,0,0,0
4,33.0,65.0,0,6,1,1,2020,26,26,4,...,0,0,0,0,0,0,0,0,0,0


In [49]:
y_train_fat = train['Fatalities']
y_train_cc = train['ConfirmedCases']

In [39]:
# make date a datetime object
test['Date'] = pd.to_datetime(test['Date'])

In [42]:
create_time_features(test)

KeyError: 'Date'

In [41]:
test.drop("Date", axis=1, inplace=True)

In [43]:
test = pd.concat([test,pd.get_dummies(test['Province/State'], prefix='ps')],axis=1)
test.drop(['Province/State'],axis=1, inplace=True)

In [44]:
test = pd.concat([test,pd.get_dummies(test['Country/Region'], prefix='cr')],axis=1)
test.drop(['Country/Region'],axis=1, inplace=True)

In [50]:
xgb_fat = xgb.XGBRegressor(n_estimators=1000)

In [51]:
xgb_fat.fit(X_train,y_train_fat,verbose=True)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=1000,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [52]:
xgb_cc = xgb.XGBRegressor(n_estimators=1000)

In [53]:
xgb_cc.fit(X_train,y_train_cc,verbose=True)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=1000,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [59]:
fat_pred = xgb_fat.predict(test.drop('ForecastId',1))
cc_pred = xgb_cc.predict(test.drop('ForecastId',1))

In [60]:
sub = pd.read_csv('submission.csv')

In [61]:
sub['ConfirmedCases'] = fat_pred
sub['Fatalities'] = cc_pred
sub.head()

Unnamed: 0,ForecastId,ConfirmedCases,Fatalities
0,1,0.321479,29.995928
1,2,0.269886,44.980709
2,3,0.173698,48.527493
3,4,0.41944,48.527493
4,5,0.372777,70.916893
