In [2]:
from fastai.tabular.all import *
from fastbook import *

from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_log_error

import seaborn as sns

from dtreeviz.trees import *
import dtreeviz

from treeinterpreter import treeinterpreter as ti
import waterfall_chart

from fastprogress import master_bar, progress_bar
from fastprogress.fastprogress import force_console_behavior


In [3]:
master_bar, progress_bar = force_console_behavior()

In [4]:
#| export
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
creds = ''

In [5]:
#| export
cred_path = Path('~/.kaggle/kaggle.json').expanduser()
if not cred_path.exists():
    cred_path.parent.mkdir(exist_ok=True)
    cred_path.write_text(creds)
    cred_path.chmod(0o600)

In [6]:
#| export
path = Path('store-sales-time-series-forecasting')

In [7]:
#| export
if not iskaggle and not path.exists():
    import zipfile, kaggle
    kaggle.api.competition_download_cli(str(path))    
    zipfile.ZipFile(f'{path}.zip').extractall(path)


In [8]:
#| export
if iskaggle:
    path = Path('../input/store-sales-time-series-forecasting')
    ! pip install -q dataset

In [9]:
train_df = pd.read_csv(path/'train.csv', low_memory=False)
test_df = pd.read_csv(path/'test.csv', low_memory=False)
sub_df = pd.read_csv(path/'sample_submission.csv', low_memory=False)
stores_df = pd.read_csv(path/'stores.csv', low_memory=False)
oil_df = pd.read_csv(path/'oil.csv', low_memory=False)
hol_events_df = pd.read_csv(path/'holidays_events.csv', low_memory=False)
transactions_df = pd.read_csv(path/'transactions.csv', low_memory=False)

In [10]:
train_df.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-01,1,BABY CARE,0.0,0
2,2,2013-01-01,1,BEAUTY,0.0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0
4,4,2013-01-01,1,BOOKS,0.0,0


In [11]:
combined_df = pd.concat([train_df, test_df])

In [12]:
combined_df = combined_df.merge(oil_df, on='date', how='left')

In [13]:
combined_df = combined_df.merge(stores_df, on='store_nbr', how='left')

In [15]:
hol_events_df.rename(columns={'type': 'hol_type'}, inplace=True)

In [16]:
combined_df = combined_df.merge(hol_events_df, on='date', how='left')

In [None]:
combined_df

In [24]:
combined_df['date'] = pd.to_datetime(combined_df['date'])

In [30]:
eq_start_date = pd.to_datetime("2016-04-16")
eq_end_date = pd.to_datetime("2016-05-16")

In [31]:
earthquake_cond = (combined_df.date >= eq_start_date) & (combined_df.date < eq_end_date)

In [32]:
earthquake_indexes = combined_df.index[earthquake_cond]

In [34]:
combined_df = add_datepart(combined_df, 'date')

In [35]:
combined_df



Unnamed: 0,id,store_nbr,family,sales,onpromotion,dcoilwtico,city,state,type,cluster,hol_type,locale,locale_name,description,transferred,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed
0,0,1,AUTOMOTIVE,0.0,0,,Quito,Pichincha,D,13,Holiday,National,Ecuador,Primer dia del ano,False,2013,1,1,1,1,1,False,True,False,True,False,True,1.356998e+09
1,1,1,BABY CARE,0.0,0,,Quito,Pichincha,D,13,Holiday,National,Ecuador,Primer dia del ano,False,2013,1,1,1,1,1,False,True,False,True,False,True,1.356998e+09
2,2,1,BEAUTY,0.0,0,,Quito,Pichincha,D,13,Holiday,National,Ecuador,Primer dia del ano,False,2013,1,1,1,1,1,False,True,False,True,False,True,1.356998e+09
3,3,1,BEVERAGES,0.0,0,,Quito,Pichincha,D,13,Holiday,National,Ecuador,Primer dia del ano,False,2013,1,1,1,1,1,False,True,False,True,False,True,1.356998e+09
4,4,1,BOOKS,0.0,0,,Quito,Pichincha,D,13,Holiday,National,Ecuador,Primer dia del ano,False,2013,1,1,1,1,1,False,True,False,True,False,True,1.356998e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3082855,3029395,9,POULTRY,,1,47.26,Quito,Pichincha,B,6,,,,,,2017,8,35,31,3,243,True,False,False,False,False,False,1.504138e+09
3082856,3029396,9,PREPARED FOODS,,0,47.26,Quito,Pichincha,B,6,,,,,,2017,8,35,31,3,243,True,False,False,False,False,False,1.504138e+09
3082857,3029397,9,PRODUCE,,1,47.26,Quito,Pichincha,B,6,,,,,,2017,8,35,31,3,243,True,False,False,False,False,False,1.504138e+09
3082858,3029398,9,SCHOOL AND OFFICE SUPPLIES,,9,47.26,Quito,Pichincha,B,6,,,,,,2017,8,35,31,3,243,True,False,False,False,False,False,1.504138e+09
