# Time Series Modeling (Smoothing Methods) Assignment

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import plotly.express as px

  import pandas.util.testing as tm


### Import the walmart stock prices data set.

In [2]:
wal_df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%203/walmart_stock_prices.csv')

In [3]:
wal_df['date'] = pd.to_datetime(wal_df['date'])
wal_df = wal_df[['date', 'close']]
wal_df.columns = ['Date', 'Observed']

In [4]:
wal_df.head()

Unnamed: 0,Date,Observed
0,2013-02-08,71.48
1,2013-02-11,71.4
2,2013-02-12,71.4
3,2013-02-13,71.39
4,2013-02-14,70.82


### Plot a line chart with the observed values (the daily closing prices).

In [5]:
px.line(wal_df, 'Date', 'Observed', title='Daily Closing Prices', template='none')

### Choose 4 moving average time windows and add columns to the data set containing the values for each of them.

In [6]:
wal_df['20day_window'] = wal_df['Observed'].rolling(window=20).mean()
wal_df['50day_window'] = wal_df['Observed'].rolling(window=50).mean()
wal_df['100day_window'] = wal_df['Observed'].rolling(window=100).mean()
wal_df['200day_window'] = wal_df['Observed'].rolling(window=200).mean()

### Plot a multi-line chart that compares the 4 different simple moving averages.

In [7]:
melted_windows = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '20day_window', '50day_window', '100day_window', '200day_window'],
                         var_name='Variable', value_name='Value')

In [8]:
px.line(melted_windows, 'Date', 'Value', color='Variable', 
        title='20, 50, 100, and 200 day simple moving avgs', template='none')

### Write a function that calculates a weighted moving average. Take the weighted moving average of the observed values and store it in a new column in the dataframe.

In [9]:
def wma(df, field, window):
  weights= np.arange(1, window+1)
  ma = df[field].rolling(window)
  wma = ma.apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)
  return wma

In [10]:
wal_df['20day_WMA'] = wma(wal_df, 'Observed', 20)
wal_df['50day_WMA'] = wma(wal_df, 'Observed', 50)
wal_df['100day_WMA'] = wma(wal_df, 'Observed', 100)
wal_df['200day_WMA'] = wma(wal_df, 'Observed', 200)

### Plot the weighted moving average in a multi-line plot with its simple moving average of the same length and evaluate the differences.

In [11]:
melted_20day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '20day_window', '20day_WMA'],
                         var_name='Variable', value_name='Value')

px.line(melted_20day_window, 'Date', 'Value', color='Variable', 
        title='20 day simple and weighted moving avgs', template='none')

In [12]:
melted_50day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '50day_window', '50day_WMA'],
                         var_name='Variable', value_name='Value')

px.line(melted_50day_window, 'Date', 'Value', color='Variable', 
        title='50 day simple and weighted moving avgs', template='none')

In [13]:
melted_100day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '100day_window', '100day_WMA'],
                         var_name='Variable', value_name='Value')

px.line(melted_100day_window, 'Date', 'Value', color='Variable', 
        title='100 day simple and weighted moving avgs', template='none')

In [14]:
melted_200day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '200day_window', '200day_WMA'],
                         var_name='Variable', value_name='Value')

px.line(melted_200day_window, 'Date', 'Value', color='Variable', 
        title='200 day simple and weighted moving avgs', template='none')

### Apply a simple exponential smoothing model to the observed values and store the results in a new column.

In [15]:
model = sm.tsa.ExponentialSmoothing(wal_df['Observed']).fit()
wal_df['20day_SExp'] = model.predict(20)
wal_df['50day_SExp'] = model.predict(50)
wal_df['100day_SExp'] = model.predict(100)
wal_df['200day_SExp'] = model.predict(200)

### Add the simple exponential smoothing values to your multi-line plot containing the simple and weighted moving average values and evaluate how they differ.

In [16]:
melted_20day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '20day_window', '20day_WMA', '20day_SExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_20day_window, 'Date', 'Value', color='Variable', 
        title='20 day simple and weighted moving avgs', template='none')

In [17]:
melted_50day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '50day_window', '50day_WMA', '50day_SExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_50day_window, 'Date', 'Value', color='Variable', 
        title='50 day simple and weighted moving avgs', template='none')

In [18]:
melted_100day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '100day_window', '100day_WMA', '100day_SExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_100day_window, 'Date', 'Value', color='Variable', 
        title='100 day simple and weighted moving avgs', template='none')

In [19]:
melted_200day_window = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '200day_window', '200day_WMA', '200day_SExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_200day_window, 'Date', 'Value', color='Variable', 
        title='200 day simple and weighted moving avgs', template='none')

### Perform double and triple exponential smoothing and store their respective values in new columns.

In [20]:
model = sm.tsa.ExponentialSmoothing(wal_df['Observed'], trend='add').fit()
wal_df['20day_DExp'] = model.predict(20)
wal_df['50day_DExp'] = model.predict(50)
wal_df['100day_DExp'] = model.predict(100)
wal_df['200day_DExp'] = model.predict(200)

In [59]:
model = sm.tsa.ExponentialSmoothing(wal_df['Observed'], trend='add', 
                                    seasonal='add', seasonal_periods=10).fit()
wal_df['20day_TExp'] = model.predict(20)
wal_df['50day_TExp'] = model.predict(50)
wal_df['100day_TExp'] = model.predict(100)
wal_df['200day_TExp'] = model.predict(200)

### Create a new multi-line plot showing the results of the three exponential smoothing methods.

In [65]:
melted_20day = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '20day_SExp', '20day_DExp', '20day_TExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_20day, 'Date', 'Value', color='Variable', 
        title='20 day three exponential smoothing', template='none')

In [66]:
melted_50day = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '50day_SExp', '50day_DExp', '50day_TExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_50day, 'Date', 'Value', color='Variable', 
        title='50 day three exponential smoothing', template='none')

In [67]:
melted_100day = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '100day_SExp', '100day_DExp', '100day_TExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_100day, 'Date', 'Value', color='Variable', 
        title='100 day three exponential smoothing', template='none')

In [68]:
melted_200day = pd.melt(wal_df, id_vars='Date',
                         value_vars=['Observed', '200day_SExp', '200day_DExp', '200day_TExp'],
                         var_name='Variable', value_name='Value')

px.line(melted_200day, 'Date', 'Value', color='Variable', 
        title='200 day three exponential smoothing', template='none')

### Evaluate the performances of the three exponential smoothing methods by calculating their mean absolute error and their root mean squared error. Which one modeled the data best?

In [54]:
def mae_rmse(data):
  print(f'MAE: {data.abs().mean()} RMSE: {np.sqrt(np.mean(data**2))}')

In [60]:
wal_df['SExp_Diff_20day'] = wal_df['Observed'] - wal_df['20day_SExp']
wal_df['DExp_Diff_20day'] = wal_df['Observed'] - wal_df['20day_DExp']
wal_df['TExp_Diff_20day'] = wal_df['Observed'] - wal_df['20day_TExp']

print('Simple 20 Day:')
mae_rmse(wal_df['SExp_Diff_20day'])

print('Double 20 Day:')
mae_rmse(wal_df['DExp_Diff_20day'])

print('Triple 20 Day:')
mae_rmse(wal_df['TExp_Diff_20day'])


Simple 20 Day:
MAE: 0.546891889183947 RMSE: 0.8247611707580098
Double 20 Day:
MAE: 0.5459363250078834 RMSE: 0.8243900666157444
Triple 20 Day:
MAE: 0.5449423292315478 RMSE: 0.8220993455456554


In [61]:
wal_df['SExp_Diff_50day'] = wal_df['Observed'] - wal_df['50day_SExp']
wal_df['DExp_Diff_50day'] = wal_df['Observed'] - wal_df['50day_DExp']
wal_df['TExp_Diff_50day'] = wal_df['Observed'] - wal_df['50day_TExp']

print('Simple 50 Day:')
mae_rmse(wal_df['SExp_Diff_50day'])

print('Double 50 Day:')
mae_rmse(wal_df['DExp_Diff_50day'])

print('Triple 50 Day:')
mae_rmse(wal_df['TExp_Diff_50day'])

Simple 50 Day:
MAE: 0.5493812166778933 RMSE: 0.8299220308996149
Double 50 Day:
MAE: 0.5485425618992016 RMSE: 0.8296629052942958
Triple 50 Day:
MAE: 0.5474956981925638 RMSE: 0.8272837509823826


In [63]:
wal_df['SExp_Diff_100day'] = wal_df['Observed'] - wal_df['100day_SExp']
wal_df['DExp_Diff_100day'] = wal_df['Observed'] - wal_df['100day_DExp']
wal_df['TExp_Diff_100day'] = wal_df['Observed'] - wal_df['100day_TExp']

print('Simple 100 Day:')
mae_rmse(wal_df['SExp_Diff_100day'])

print('Double 100 Day:')
mae_rmse(wal_df['DExp_Diff_100day'])

print('Triple 100 Day:')
mae_rmse(wal_df['TExp_Diff_100day'])

Simple 100 Day:
MAE: 0.5490454029917842 RMSE: 0.836465698696353
Double 100 Day:
MAE: 0.5482148875796643 RMSE: 0.8360934357714518
Triple 100 Day:
MAE: 0.5473534994368037 RMSE: 0.8337189574081788


In [64]:
wal_df['SExp_Diff_200day'] = wal_df['Observed'] - wal_df['200day_SExp']
wal_df['DExp_Diff_200day'] = wal_df['Observed'] - wal_df['200day_DExp']
wal_df['TExp_Diff_200day'] = wal_df['Observed'] - wal_df['200day_TExp']

print('Simple 200 Day:')
mae_rmse(wal_df['SExp_Diff_200day'])

print('Double 200 Day:')
mae_rmse(wal_df['DExp_Diff_200day'])

print('Triple 200 Day:')
mae_rmse(wal_df['TExp_Diff_200day'])

Simple 200 Day:
MAE: 0.5621068297747482 RMSE: 0.8577642387344923
Double 200 Day:
MAE: 0.5610482929207913 RMSE: 0.8574485942323363
Triple 200 Day:
MAE: 0.5603136353003347 RMSE: 0.8554878548261433


#Lecture Notes

In [71]:
df = pd.read_csv('/content/LA_weather.csv')

In [72]:
df['Date'] = pd.to_datetime(df['Date'])

In [73]:
df['Day'] = df['Date'].dt.date
df = df.groupby('Day', as_index=False).mean() #.last()

In [74]:
df = df[['Day', 'Observed']]

In [75]:
px.line(df, 'Day', 'Observed', template='none')

In [76]:
df['20day_window'] = df['Observed'].rolling(window=20).mean()
df['50day_window'] = df['Observed'].rolling(window=50).mean()
df['100day_window'] = df['Observed'].rolling(window=100).mean()
df['200day_window'] = df['Observed'].rolling(window=200).mean()

In [78]:
melted = pd.melt(df, id_vars='Day',
                         value_vars=['Observed', '20day_window', '50day_window', '100day_window', '200day_window'],
                         var_name='Variable', value_name='Value')

In [81]:
px.line(melted, x='Day', y='Value', color='Variable', template='none')

In [None]:
# def wma(df, field, window):
#   weights= np.arange(1, window+1)
#   ma = df[field].rolling(window)
#   wma = ma.apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)
#   return wma

In [82]:
df['200day_WMA'] = wma(df, 'Observed', 200)

In [83]:
melted = pd.melt(df, id_vars='Day',
                         value_vars=['Observed', '200day_window', '200day_WMA'],
                         var_name='Variable', value_name='Value')

In [84]:
px.line(melted, x='Day', y='Value', color='Variable', template='none')

In [85]:
model = sm.tsa.ExponentialSmoothing(df['Observed']).fit()
df['200day_SExp'] = model.predict(200)

In [86]:
melted = pd.melt(df, id_vars='Day',
                         value_vars=['Observed', '200day_window', '200day_WMA', '200day_SExp'],
                         var_name='Variable', value_name='Value')

px.line(melted, x='Day', y='Value', color='Variable', template='none')

In [89]:
model = sm.tsa.ExponentialSmoothing(df['Observed'], trend='add').fit()
df['200day_DExp'] = model.predict(200)

In [90]:
model = sm.tsa.ExponentialSmoothing(df['Observed'], trend='add',
                                    seasonal='add',
                                    seasonal_periods=4).fit()
df['200day_TExp'] = model.predict(200)

In [91]:
melted = pd.melt(df, id_vars='Day',
                         value_vars=['Observed', '200day_TExp', '200day_DExp', '200day_SExp'],
                         var_name='Variable', value_name='Value')

px.line(melted, x='Day', y='Value', color='Variable', template='none')

In [93]:
df['SExp_Diff_200day'] = df['Observed'] - df['200day_SExp']
df['DExp_Diff_200day'] = df['Observed'] - df['200day_DExp']
df['TExp_Diff_200day'] = df['Observed'] - df['200day_TExp']

print('Simple:')
mae_rmse(df['SExp_Diff_200day'])

print('Double:')
mae_rmse(df['DExp_Diff_200day'])

print('Triple:')
mae_rmse(df['TExp_Diff_200day'])

Simple:
MAE: 1.234780089584224 RMSE: 1.6420854733303023
Double:
MAE: 1.2348159554350198 RMSE: 1.642082999781356
Triple:
MAE: 1.3154465870840863 RMSE: 1.7350758787328517
