In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
store_sales = pd.read_csv('/kaggle/input/store-sales-time-series-forecasting/train.csv',
                parse_dates=["date"],
                infer_datetime_format=True,
                )
store_sales.info()

Extract sales for Grocery product family

In [None]:
grocery = store_sales[store_sales['family'] == 'GROCERY I']
grocery_sales = grocery.groupby('date').sum()['sales']
grocery_sales

Add Time-step feature

In [None]:
df = grocery_sales.to_frame()
df['time'] = np.arange(len(df.index)) # time dummy
df.head()

Build Model

In [None]:
from sklearn.linear_model import LinearRegression
# Training data
X = df.loc[:, ['time']]  # features
y = df.loc[:, 'sales']  # target

model = LinearRegression()
model.fit(X, y)

y_pred = pd.Series(model.predict(X), index=X.index)
y_pred

In [None]:
import matplotlib.pyplot as plt

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 4))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=14,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
    legend=False,
)
%config InlineBackend.figure_format = 'retina'

In [None]:
fig, ax = plt.subplots()

ax = y.plot(**plot_params, alpha=0.5)
ax = y_pred.plot(ax=ax, linewidth=3)
ax.set_title('Time Plot for Grocery Sales');

Add Lag feature

In [None]:
df['Lag_1'] = df['sales'].shift(1)
df.head()

In [None]:
df.dropna(inplace=True) 
df

In [None]:
# Training data
X = df.loc[:, ['Lag_1']]  # features
y = df.loc[:, 'sales']  # target

model = LinearRegression()
model.fit(X, y)

y_pred = pd.Series(model.predict(X), index=X.index)
y_pred

In [None]:
fig, ax = plt.subplots()
ax.plot(X['Lag_1'], y, '.', color='0.25')
ax.plot(X['Lag_1'], y_pred)
ax.set_aspect('equal')
ax.set_ylabel('sales')
ax.set_xlabel('Lag_1')
ax.set_title('Lag Plot of Grocery sales');

In [None]:
ax = y.plot(**plot_params)
ax = y_pred.plot()