# Time Series Modeling (Smoothing Methods) Assignment

In [26]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import plotly.express as px

### Import the walmart stock prices data set.

In [27]:
data = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%203/walmart_stock_prices.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    1259 non-null   object 
 1   open    1259 non-null   float64
 2   high    1259 non-null   float64
 3   low     1259 non-null   float64
 4   close   1259 non-null   float64
 5   volume  1259 non-null   int64  
 6   Name    1259 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 59.1+ KB


In [28]:
data['date'] = pd.to_datetime(data['date'])
data.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,71.2,71.64,71.07,71.48,5906823,WMT
1,2013-02-11,71.25,71.51,70.53,71.4,6202534,WMT
2,2013-02-12,71.49,71.66,71.1,71.4,4761910,WMT
3,2013-02-13,71.29,71.7,71.21,71.39,3969807,WMT
4,2013-02-14,71.1,71.23,70.755,70.82,6820952,WMT


### Plot a line chart with the observed values (the daily closing prices).

In [72]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, template='none').update(layout=dict(title=dict(x=0.5)))
    fig.show()

ilinechart(data, 'date', 'close', title='Closing Prices Over Time')

### Choose 4 moving average time windows and add columns to the data set containing the values for each of them.

In [30]:
data['Weekly_MA'] = data['close'].rolling(window=5).mean()
data['Monthly_MA'] = data['close'].rolling(window=21).mean()
data['Quarterly_MA'] = data['close'].rolling(window=63).mean()
data['Yearly_MA'] = data['close'].rolling(window=252).mean()


### Plot a multi-line chart that compares the 4 different simple moving averages.

In [73]:
melty1 = pd.melt(data, id_vars='date', value_vars=['close', 'Weekly_MA', 'Monthly_MA', 'Quarterly_MA', 'Yearly_MA'], var_name='Variable', value_name='Value')
ilinechart(melty1, 'date', 'Value', groups='Variable', title='Moving Average')

### Write a function that calculates a weighted moving average. Take the weighted moving average of the observed values and store it in a new column in the dataframe.

In [32]:
def wma(df, field, window):
    weights = np.arange(1, window+1)
    ma = df[field].rolling(window)
    wma= ma.apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)
    return wma

data['Weekly_WMA'] = wma(data, 'close', 5)
data['Monthly_WMA'] = wma(data, 'close', 21)
data['Quarterly_WMA'] = wma(data, 'close', 63)
data['Yearly_WMA'] = wma(data, 'close', 252)

### Plot the weighted moving average in a multi-line plot with its simple moving average of the same length and evaluate the differences.

In [69]:
melty2 = pd.melt(data, id_vars='date', value_vars=['close', 'Weekly_MA', 'Weekly_WMA'], var_name='Variable', value_name='Value')
ilinechart(melty2, 'date', 'Value', groups='Variable', title='Weekly MA vs WMA')

In [74]:
melty3 = pd.melt(data, id_vars='date', value_vars=['close', 'Monthly_MA', 'Monthly_WMA'], var_name='Variable', value_name='Value')
ilinechart(melty3, 'date', 'Value', groups='Variable', title='Monthly MA vs WMA')

In [67]:
melty4 = pd.melt(data, id_vars='date', value_vars=['close', 'Quarterly_MA', 'Quarterly_WMA'], var_name='Variable', value_name='Value')
ilinechart(melty4, 'date', 'Value', groups='Variable', title='Quarterly MA vs WMA')

In [66]:
melty5 = pd.melt(data, id_vars='date', value_vars=['close', 'Yearly_MA', 'Yearly_WMA'], var_name='Variable', value_name='Value')
ilinechart(melty5, 'date', 'Value', groups='Variable', title='Yearly MA vs WMA')

### Apply a simple exponential smoothing model to the observed values and store the results in a new column.

In [37]:
weekmodel = sm.tsa.ExponentialSmoothing(data['close']).fit()
data['Weekly_SExp'] = weekmodel.predict(5)
monthmodel = sm.tsa.ExponentialSmoothing(data['close']).fit()
data['Monthly_SExp'] = monthmodel.predict(21)
quartermodel = sm.tsa.ExponentialSmoothing(data['close']).fit()
data['Quarterly_SExp'] = quartermodel.predict(63)
yearmodel = sm.tsa.ExponentialSmoothing(data['close']).fit()
data['Yearly_SExp'] = yearmodel.predict(252)


After 0.13 initialization must be handled at model creation



### Add the simple exponential smoothing values to your multi-line plot containing the simple and weighted moving average values and evaluate how they differ.

In [57]:
melty6 = pd.melt(data, id_vars='date', value_vars=['close', 'Weekly_MA', 'Weekly_WMA', 'Weekly_SExp'], var_name='Variable', value_name='Value')
ilinechart(melty6, 'date', 'Value', groups='Variable', title='Weekly MA vs WMA vs SExp')

In [75]:
melty7 = pd.melt(data, id_vars='date', value_vars=['close', 'Monthly_MA', 'Monthly_WMA', 'Monthly_SExp'], var_name='Variable', value_name='Value')
ilinechart(melty7, 'date', 'Value', groups='Variable', title='Monthly MA vs WMA vs SExp')

In [59]:
melty8 = pd.melt(data, id_vars='date', value_vars=['close', 'Quarterly_MA', 'Quarterly_WMA', 'Quarterly_SExp'], var_name='Variable', value_name='Value')
ilinechart(melty8, 'date', 'Value', groups='Variable', title='Quarterly MA vs WMA vs SExp')

In [60]:
melty9 = pd.melt(data, id_vars='date', value_vars=['close', 'Yearly_MA', 'Yearly_WMA', 'Yearly_SExp'], var_name='Variable', value_name='Value')
ilinechart(melty9, 'date', 'Value', groups='Variable', title='Yearly MA vs WMA vs SExp')

### Perform double and triple exponential smoothing and store their respective values in new columns.

In [42]:
# Double

weekmodel1 = sm.tsa.ExponentialSmoothing(data['close'], trend='add').fit()
data['Weekly_DExp'] = weekmodel1.predict(5)
monthmodel1 = sm.tsa.ExponentialSmoothing(data['close'], trend='add').fit()
data['Monthly_DExp'] = monthmodel1.predict(21)
quartermodel1 = sm.tsa.ExponentialSmoothing(data['close'], trend='add').fit()
data['Quarterly_DExp'] = quartermodel1.predict(63)
yearmodel1 = sm.tsa.ExponentialSmoothing(data['close'], trend='add').fit()
data['Yearly_DExp'] = yearmodel1.predict(252)

# Triple

weekmodel2 = sm.tsa.ExponentialSmoothing(data['close'], trend='add', seasonal='add', seasonal_periods=4).fit()
data['Weekly_TExp'] = weekmodel2.predict(5)
monthmodel2 = sm.tsa.ExponentialSmoothing(data['close'], trend='add', seasonal='add', seasonal_periods=4).fit()
data['Monthly_TExp'] = monthmodel2.predict(21)
quartermodel2 = sm.tsa.ExponentialSmoothing(data['close'], trend='add', seasonal='add', seasonal_periods=4).fit()
data['Quarterly_TExp'] = quartermodel2.predict(63)
yearmodel2 = sm.tsa.ExponentialSmoothing(data['close'], trend='add', seasonal='add', seasonal_periods=4).fit()
data['Yearly_TExp'] = yearmodel2.predict(252)

### Create a new multi-line plot showing the results of the three exponential smoothing methods.

In [61]:
melty10 = pd.melt(data, id_vars='date', value_vars=['close', 'Weekly_SExp', 'Weekly_DExp', 'Weekly_TExp'], var_name='Variable', value_name='Value')
ilinechart(melty10, 'date', 'Value', groups='Variable', title='Weekly SExp vs DExp vs TExp')

In [76]:
melty11 = pd.melt(data, id_vars='date', value_vars=['close', 'Monthly_SExp', 'Monthly_DExp', 'Monthly_TExp'], var_name='Variable', value_name='Value')
ilinechart(melty11, 'date', 'Value', groups='Variable', title='Monthly SExp vs DExp vs TExp')

In [63]:
melty12 = pd.melt(data, id_vars='date', value_vars=['close', 'Quarterly_SExp', 'Quarterly_DExp', 'Quarterly_TExp'], var_name='Variable', value_name='Value')
ilinechart(melty12, 'date', 'Value', groups='Variable', title='Quarterly SExp vs DExp vs TExp')

In [64]:
melty13 = pd.melt(data, id_vars='date', value_vars=['close', 'Yearly_SExp', 'Yearly_DExp', 'Yearly_TExp'], var_name='Variable', value_name='Value')
ilinechart(melty13, 'date', 'Value', groups='Variable', title='Yearly SExp vs DExp vs TExp')

### Evaluate the performances of the three exponential smoothing methods by calculating their mean absolute error and their root mean squared error. Which one modeled the data best?

In [65]:
data['Weekly_SExp_Err'] = data['close'] - data['Weekly_SExp']
data['Weekly_DExp_Err'] = data['close'] - data['Weekly_DExp']
data['Weekly_TExp_Err'] = data['close'] - data['Weekly_TExp']

print('Simple || Weekly MAE:', data['Weekly_SExp_Err'].abs().mean(),
      '| Weekly RMSE:', np.sqrt(np.mean(data['Weekly_SExp_Err']**2)))
print('Double || Weekly MAE:', data['Weekly_DExp_Err'].abs().mean(),
      '| Weekly RMSE:', np.sqrt(np.mean(data['Weekly_DExp_Err']**2)))
print('Triple || Weekly MAE:', data['Weekly_TExp_Err'].abs().mean(),
      '| Weekly RMSE:', np.sqrt(np.mean(data['Weekly_TExp_Err']**2)))

Simple || Weekly MAE: 0.5479983382963851 | Weekly RMSE: 0.8243060551757576
Double || Weekly MAE: 0.5469963796667892 | Weekly RMSE: 0.8238912038900988
Triple || Weekly MAE: 0.5467965615102722 | Weekly RMSE: 0.8230847450579265


In [48]:
data['Monthly_SExp_Err'] = data['close'] - data['Monthly_SExp']
data['Monthly_DExp_Err'] = data['close'] - data['Monthly_DExp']
data['Monthly_TExp_Err'] = data['close'] - data['Monthly_TExp']

print('Simple || Monthly MAE:', data['Monthly_SExp_Err'].abs().mean(),
      '| Monthly RMSE', np.sqrt(np.mean(data['Monthly_SExp_Err']**2)))
print('Double || Monthly MAE:', data['Monthly_DExp_Err'].abs().mean(),
      '| Monthly RMSE', np.sqrt(np.mean(data['Monthly_DExp_Err']**2)))
print('Triple || Monthly MAE:', data['Monthly_TExp_Err'].abs().mean(),
      '| Monthly RMSE', np.sqrt(np.mean(data['Monthly_TExp_Err']**2)))

Simple || Monthly MAE: 0.5472874189171427 | Monthly RMSE 0.8250925825661596
Double || Monthly MAE: 0.5463097037328916 | Monthly RMSE 0.8247195796721221
Triple || Monthly MAE: 0.5460136932409325 | Monthly RMSE 0.823739058536493


In [49]:
data['Quarterly_SExp_Err'] = data['close'] - data['Quarterly_SExp']
data['Quarterly_DExp_Err'] = data['close'] - data['Quarterly_DExp']
data['Quarterly_TExp_Err'] = data['close'] - data['Quarterly_TExp']

print('Simple || Quarterly MAE:', data['Quarterly_SExp_Err'].abs().mean(),
      '| Quarterly RMSE', np.sqrt(np.mean(data['Quarterly_SExp_Err']**2)))
print('Double || Quarterly MAE:', data['Quarterly_DExp_Err'].abs().mean(),
      '| Quarterly RMSE', np.sqrt(np.mean(data['Quarterly_DExp_Err']**2)))
print('Triple || Quarterly MAE:', data['Quarterly_TExp_Err'].abs().mean(),
      '| Quarterly RMSE', np.sqrt(np.mean(data['Quarterly_TExp_Err']**2)))

Simple || Quarterly MAE: 0.5493938457404857 | Quarterly RMSE 0.8318475584986706
Double || Quarterly MAE: 0.5485691799032535 | Quarterly RMSE 0.8315949262486474
Triple || Quarterly MAE: 0.5480600125912491 | Quarterly RMSE 0.8303989294766502


In [50]:
data['Yearly_SExp_Err'] = data['close'] - data['Yearly_SExp']
data['Yearly_DExp_Err'] = data['close'] - data['Yearly_DExp']
data['Yearly_TExp_Err'] = data['close'] - data['Yearly_TExp']

print('Simple || Yearly MAE:', data['Yearly_SExp_Err'].abs().mean(),
      '| Yearly RMSE', np.sqrt(np.mean(data['Yearly_SExp_Err']**2)))
print('Double || Yearly MAE:', data['Yearly_DExp_Err'].abs().mean(),
      '| Yearly RMSE', np.sqrt(np.mean(data['Yearly_DExp_Err']**2)))
print('Triple || Yearly MAE:', data['Yearly_TExp_Err'].abs().mean(),
      '| Yearly RMSE', np.sqrt(np.mean(data['Yearly_TExp_Err']**2)))

Simple || Yearly MAE: 0.5691354951439933 | Yearly RMSE 0.8706606735564397
Double || Yearly MAE: 0.5679979388174334 | Yearly RMSE 0.8701589896736884
Triple || Yearly MAE: 0.5678914853886949 | Yearly RMSE 0.8693057913749753
