In [1]:
# Setup notebook
import pandas as pd
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
from sklearn.model_selection import train_test_split

In [15]:
# Ignore Future Warning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
# Setup
train = pd.read_csv('train.csv', parse_dates=["date"])
test = pd.read_csv('test.csv', parse_dates=["date"])

# Drop 'store_nbr' and 'onpromotion'
train = train.drop(['store_nbr','onpromotion'], axis=1)
test = test.drop(['store_nbr','onpromotion'], axis=1)

In [3]:
# Generates subsets for each family
def generate_subsets(dataset, family):
    subset = dataset.loc[dataset['family']==family, :]
    return subset

In [4]:
# Check unique values of "family"
family_list = train["family"].unique()
test_family_list = test["family"].unique()

# Save the subsets in dictionary
train_subsets = {}
for family in family_list:
    train_subsets.update({family:generate_subsets(train, family)})

test_subsets = {}
for family in test_family_list:
    test_subsets.update({family:generate_subsets(test, family)})

In [24]:
# Make "date" as index
def by_date(sub_dataset, key):
    df= sub_dataset[key].groupby(by="date").sum()
    df = df.to_period('D')

    return df

In [26]:
# Generate a dictionary of "date"-indexed train subsets
dated_train_subsets = {}
for family in family_list:
    dated_train_subsets.update({family:by_date(train_subsets, family)})

# Generate a dictionary of "date"-indexed test subsets
dated_test_subsets = {}
for family in test_family_list:
    dated_test_subsets.update({family:by_date(test_subsets, family)}) 

In [21]:
# Splits the train data into 70:30 (sorted)
def split_train(df):
    return train_test_split(df, test_size=0.3, random_state=None, shuffle=False)

In [27]:
# Add seasonal features(day, week, dayofyear, year)
def add_seasonal(subset, key):
    seasonal = subset[key]
    seasonal["day"] = seasonal.index.dayofweek
    seasonal["week"] = seasonal.index.week
    seasonal["dayofyear"] = seasonal.index.dayofyear
    seasonal["year"] = seasonal.index.year
    return seasonal

for family in family_list:
    add_seasonal(dated_train_subsets, family)

In [28]:
# Seasonal model function
def seasonal_final(train, test):

    fourier = CalendarFourier(freq="A", order=12)  # 12 sin/cos pairs for "A"nnual seasonality

    season_dp = DeterministicProcess(
        index=train.index,
        constant=True,               # dummy feature for bias (y-intercept)
        order=1,                     # trend (order 1 means linear)
        seasonal=True,               # weekly seasonality (indicators)
        additional_terms=[fourier],  # annual seasonality (fourier)
        drop=True,                   # drop terms to avoid collinearity
    )

    X = season_dp.in_sample()  # create features for dates in train.index
    y = train["sales"]

    season_model = LinearRegression(fit_intercept=False)
    _ = season_model.fit(X, y)

    # Forecasting sales
    X_fore = season_dp.out_of_sample(steps=len(test.index))
    y_fore = pd.Series(season_model.predict(X_fore), index=X_fore.index)

    return y_fore

In [29]:
forecasting = []

# Apply seasonal model
for key in test_family_list:
    forecasting.append(seasonal_final(train_subsets[key], test_subsets[key]))

AttributeError: 'NoneType' object has no attribute 'rule_code'