In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
dtype = {
    'store_nbr': 'category',
    'family': 'category',
    'sales': 'float32',
    'onpromotion': 'uint64',
}

store_sales = pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/train.csv',
                dtype=dtype,
                parse_dates=["date"],
                infer_datetime_format=True,
                )

store_sales = store_sales.set_index('date').to_period('D')
grocery = store_sales[store_sales['family'] == 'GROCERY I']
grocery_sales = grocery.groupby('date').sum()['sales']
grocery_sales = grocery_sales.to_frame()

In [None]:
import matplotlib.pyplot as plt

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 4))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=14,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
    legend=False,
)
%config InlineBackend.figure_format = 'retina'

In [None]:
ax = grocery_sales.plot(**plot_params)
ax.set(title="Grocery Sales", ylabel="Millions of Dollars");

## Determine trend with a moving average plot

In [None]:
trend = grocery_sales.rolling(
    window=365,       # 365-day window
    center=True,      # puts the average at the center of the window
    min_periods=183,  # choose about half the window size
).mean() 

# Make a plot
ax = grocery_sales.plot(**plot_params, alpha=0.5)
ax = trend.plot(ax=ax, linewidth=3)

## Create a Trend Feature

Use `DeterministicProcess` to create a feature set for a cubic trend model. Also create features for a 30-day forecast.

In [None]:
from statsmodels.tsa.deterministic import DeterministicProcess

y = grocery_sales.copy()

dp = DeterministicProcess(
    index=y.index,  # dates from the training data
    order=3,             # order of the polynomial
)

# Create the feature set for the dates given in y.index
X = dp.in_sample()

# Create features for a 30-day forecast.
X_fore = dp.out_of_sample(steps=30)

In [None]:
print(X)

In [None]:
from sklearn.linear_model import LinearRegression
import pandas as pd

model = LinearRegression()
model.fit(X, y)

y_pred = pd.Series(model.predict(X)[:,0], index=X.index)
y_fore = pd.Series(model.predict(X_fore)[:,0], index=X_fore.index)

ax = y.plot(**plot_params, title="Grocery Sales", ylabel="items sold")
ax = y_pred.plot(ax=ax, linewidth=3, label="Trend", color='C0')
ax = y_fore.plot(ax=ax, linewidth=3, label="Trend Forecast", color='C3')

## Fit trend with splines

*Splines* are a nice alternative to polynomials when you want to fit a trend. The *Multivariate Adaptive Regression Splines* (MARS) algorithm in the `pyearth` library is powerful and easy to use.

In [None]:
from pyearth import Earth

from pyearth import Earth

# Target and features are the same as before
y = grocery_sales.copy()
dp = DeterministicProcess(index=y.index, order=1)
X = dp.in_sample()
X_fore = dp.out_of_sample(steps=30)

# Fit a MARS model with `Earth`
model = Earth()
model.fit(X, y)

y_pred = pd.Series(model.predict(X), index=X.index)
y_fore = pd.Series(model.predict(X_fore), index=X_fore.index)

ax = y.plot(**plot_params, title="Grocery Sales", ylabel="items sold")
ax = y_pred.plot(ax=ax, linewidth=3, label="Trend", color='C0')
ax = y_fore.plot(ax=ax, linewidth=3, label="Trend Forecast", color='C3')