# Exercises

### The end result of this exercise should be a Jupyter notebook named model.
### Use either the SAAS dataset or the store data and:

# 1. Split data (train/validate/test) and resample by any period except daily. Aggregate using the sum.

In [None]:
# split settings
train_size = int(len(df) * .5)
validate_size = int(len(df) * .3)
test_size = int(len(df) - train_size - validate_size)
validate_end_index = train_size + validate_size

# split into train, validation, test
train = df[: train_size]
validate = df[train_size : validate_end_index]
test = df[validate_end_index : ]

# 2. Forecast, plot and evaluate using each of the 4 parametric based methods we discussed:

## a. Simple Average

In [None]:
items = round(train['items_sold'].mean(), 2)
dollars = round(train['dollars_sold'].mean(), 2)


yhat_df = pd.DataFrame({'items_sold': [items],
                            'dollars_sold': [dollars],
                           }, index = validate.index)

yhat_df.head(3)


In [None]:
for col in train.columns:
    plot_and_eval(col)


In [None]:
for col in train.columns:
    eval_df = append_eval_df(model_type='simple_average', 
                             target_var = col)
eval_df


## b. Moving Average

In [None]:
# compute a 30 day rolling average, 
# use the most recent/last 30 day period value to predict forward. 

period = 30

items = round(train['items_sold'].rolling(period).mean().iloc[-1], 2)
dollars = round(train['dollars_sold'].rolling(period).mean().iloc[-1], 2)

# yhat_df = make_predictions()

yhat_df = pd.DataFrame({'items_sold': [items],
                            'dollars_sold': [dollars],
                           }, index = validate.index)
yhat_df.head(2)


In [None]:
for col in train.columns:
    plot_and_eval(col)


In [None]:
for col in train.columns:
    eval_df = append_eval_df(model_type='30d moving average', 
                             target_var = col)
eval_df


In [None]:
periods = [1, 4, 12, 26, 52, 104]

for p in periods:
    items = round(train['items_sold'].rolling(p).mean().iloc[-1], 2)
    dollars = round(train['dollars_sold'].rolling(p).mean().iloc[-1], 2)

    yhat_df = pd.DataFrame({'items_sold': [items],
                            'dollars_sold': [dollars],
                           }, index = validate.index)

    model_type = str(p) + 'd moving average'
    eval_df = append_eval_df(model_type = model_type,
                             target_var = 'items_sold'
                            )
    eval_df = append_eval_df(model_type = model_type,
                             target_var = 'dollars_sold'
                            )


In [None]:
# get the min rmse for each variable

min_rmse_dollars_sold = eval_df.groupby('target_var')['rmse'].min()[0]
min_rmse_items_sold = eval_df.groupby('target_var')['rmse'].min()[1]

# filter only the rows that match those rmse to find out 
# which models are best thus far
eval_df[((eval_df.rmse == min_rmse_dollars_sold) | 
         (eval_df.rmse == min_rmse_items_sold)
        )]


In [None]:
import statsmodels.api as sm

for col in train.columns:
    print(col,'\n')
    _ = sm.tsa.seasonal_decompose(train[col].resample('W').mean()).plot()
    plt.show()

## c. Holt's Linear Trend Model

In [None]:
def holt_linear_trend():
    for col in train.columns:
    model = Holt(train[col], exponential = False)
    model = model.fit(smoothing_level = .1, 
                      smoothing_slope = .1, 
                      optimized = False)
    yhat_items = model.predict(start = validate.index[0], 
                               end = validate.index[-1])
    yhat_df[col] = round(yhat_items, 2)

    for col in train.columns:
    plot_and_eval(target_var = col)

    for col in train.columns:
    eval_df = append_eval_df(model_type = 'Holts', 
                             target_var = col)
    eval_df

## d. Based on previous year/month/etc., this is up to you.

In [None]:
train = df[:'2015']
validate = df['2016']
test = df['2017']


In [None]:
yhat_df = train['2015'] + train.diff(365).mean()


In [None]:
pd.concat([yhat_df.head(1), validate.head(1)])


In [None]:
# set yhat_df to index of validate
yhat_df.index = validate.index

len(yhat_df)


In [None]:
for col in train.columns:
    plot_and_eval(target_var = col)
    eval_df = append_eval_df(model_type = 'previous year', target_var = col)


In [None]:
eval_df


# Bonus

### Using the store data:

## 1. Predict 2018 total monthly sales for a single store and/or item by creating a model.

## 2. Return a dataframe with the month, store_id, y-hat, and the confidence intervals (y-hat lower, y-hat upper). The upper and lower bounds of the predictions are auto generated when using the facebook prophet model, or you could calculate your own using, for example, bollinger bands.

## 3. Plot the 2018 monthly sales predictions.