In [None]:
import os
os.chdir(os.environ['PROJECT_ROOT'])

In [None]:
import pandas as pd
from prophet import Prophet
from pathlib import Path

In [None]:
DATA_ROOT = Path('data', 'kaggle', 'store-sales-time-series-forecasting')

In [None]:
train = pd.read_csv(DATA_ROOT / 'prepared_train.csv', low_memory=False)
train.head()

In [None]:
train_prepared_data = train[['date', 'sales', 'store_nbr', 'family']].rename(columns={'date': 'ds', 'sales': 'y'})
train_prepared_data.dtypes

In [None]:
train_prepared_data.head()

In [None]:
test = pd.read_csv(DATA_ROOT / 'test.csv')
test.head()

In [None]:
test_prepared_data = test.drop(['onpromotion', 'id'], axis=1)
test_prepared_data['family'] = test_prepared_data['family'].str.lower()

In [None]:
test_prepared_data.head()

In [None]:
test_prepared_data = test_prepared_data.rename(columns={'date': 'ds'})

In [None]:
submission = pd.read_csv(DATA_ROOT / 'sample_submission.csv')

In [None]:
for family in train_prepared_data['family'].unique():
    for store_nbr in train_prepared_data['store_nbr'].unique():
        X_train = train_prepared_data.loc[(train_prepared_data['family'] == family) & (train_prepared_data['store_nbr'] == store_nbr)]
        model = Prophet()
        model.fit(X_train)
        test_indexer = test_prepared_data.loc[(train_prepared_data['family'] == family) & (train_prepared_data['store_nbr'] == store_nbr)].index
        X_test = test_prepared_data.loc[(train_prepared_data['family'] == family) & (train_prepared_data['store_nbr'] == store_nbr)]
        X_test = X_test.drop(['store_nbr', 'family'], axis=1)
        submission.loc[test_indexer, 'sales'] = list(model.predict(X_test)['yhat'])

In [None]:
submission[submission['sales'] < 0]['sales'] = 0

In [None]:
submission.to_csv('./data/kaggle/store-sales-time-series-forecasting/simplest_prophet.csv', index = False)

In [None]:
train_prepared_data_copy = train_prepared_data.copy()

In [None]:
border_index = list(train_prepared_data_copy[train_prepared_data_copy['ds'] == '2015-05-01'].index)[0]
border_index

In [None]:
train_prepared_data_copy = train_prepared_data_copy.loc[border_index: , :]

In [None]:
train_prepared_data_copy

In [None]:
holidays_data = pd.read_csv(DATA_ROOT / 'holidays_events.csv')
holidays_data.head()

In [None]:
holidays_events_data = pd.DataFrame({
    'holiday': 'holidays_events',
    'ds': pd.to_datetime(holidays_data[(holidays_data['transferred'] == False) & (holidays_data['date'] >= '2015-05-01')]['date'].unique()),
    'lower_window': 0,
    'upper_window': 0,
})

In [None]:
for family in train_prepared_data_copy['family'].unique():
    for store_nbr in train_prepared_data_copy['store_nbr'].unique():
        X_train = train_prepared_data_copy.loc[(train_prepared_data_copy['family'] == family) & (train_prepared_data_copy['store_nbr'] == store_nbr)]
        X_train = X_train.drop(['store_nbr', 'family'], axis=1)
        model = Prophet()
        model.fit(X_train)
        test_indexer = test_prepared_data.loc[(test_prepared_data['family'] == family) & (test_prepared_data['store_nbr'] == store_nbr)].index
        X_test = test_prepared_data.loc[(test_prepared_data['family'] == family) & (test_prepared_data['store_nbr'] == store_nbr)]
        X_test = X_test.drop(['store_nbr', 'family'], axis=1)
        submission.loc[test_indexer, 'sales'] = list(model.predict(X_test)['yhat'])

In [None]:
submission.loc[list(submission[submission['sales'] < 0].index), 'sales'] = 0

In [None]:
submission.to_csv('./data/kaggle/store-sales-time-series-forecasting/simple_prophet_with_smaller_dataset.csv', index = False)