# Time Series Modeling (Smoothing Methods) Assignment

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import plotly.express as px

### Import the walmart stock prices data set.

In [2]:
#read data
walmart = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%203/walmart_stock_prices.csv')
print(walmart.shape)
walmart.head()

(1259, 7)


Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,71.2,71.64,71.07,71.48,5906823,WMT
1,2013-02-11,71.25,71.51,70.53,71.4,6202534,WMT
2,2013-02-12,71.49,71.66,71.1,71.4,4761910,WMT
3,2013-02-13,71.29,71.7,71.21,71.39,3969807,WMT
4,2013-02-14,71.1,71.23,70.755,70.82,6820952,WMT


In [4]:
#change date column to datetime
walmart['date'] = pd.to_datetime(walmart['date'])
walmart.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1259 non-null   datetime64[ns]
 1   open    1259 non-null   float64       
 2   high    1259 non-null   float64       
 3   low     1259 non-null   float64       
 4   close   1259 non-null   float64       
 5   volume  1259 non-null   int64         
 6   Name    1259 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 69.0+ KB


### Plot a line chart with the observed values (the daily closing prices).

In [18]:
walmart = walmart[['date', 'close']]
walmart.head()

Unnamed: 0,date,close
0,2013-02-08,71.48
1,2013-02-11,71.4
2,2013-02-12,71.4
3,2013-02-13,71.39
4,2013-02-14,70.82


In [5]:
#line chart function
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none')
    fig.show()

In [19]:
ilinechart(walmart, 'date', 'close', title='Walmart Observations over time')

### Choose 4 moving average time windows and add columns to the data set containing the values for each of them.

In [20]:
#adding columns for avg time of certain windows
walmart['20_Day'] = walmart['close'].rolling(window=20).mean()
walmart['50_Day'] = walmart['close'].rolling(window=50).mean()
walmart['100_Day'] = walmart['close'].rolling(window=100).mean()
walmart['200_Day'] = walmart['close'].rolling(window=200).mean()
walmart.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

Unnamed: 0,date,close,20_Day,50_Day,100_Day,200_Day
0,2013-02-08,71.48,,,,
1,2013-02-11,71.4,,,,
2,2013-02-12,71.4,,,,
3,2013-02-13,71.39,,,,
4,2013-02-14,70.82,,,,


### Plot a multi-line chart that compares the 4 different simple moving averages.

In [21]:
walmart.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   date     1259 non-null   datetime64[ns]
 1   close    1259 non-null   float64       
 2   20_Day   1240 non-null   float64       
 3   50_Day   1210 non-null   float64       
 4   100_Day  1160 non-null   float64       
 5   200_Day  1060 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 59.1 KB


In [23]:
#melt data in order to plot
melted = pd.melt(walmart,
                 id_vars='date',
                 value_vars=['close', '20_Day', '50_Day', '100_Day', '200_Day'],
                 var_name='Variables',
                 value_name='Values')

In [25]:

#graph melted df
ilinechart(melted, 'date', 'Values', groups='Variables', 
           title='Simple Moving Average Comparison')

### Write a function that calculates a weighted moving average. Take the weighted moving average of the observed values and store it in a new column in the dataframe.

In [26]:
#weighted moving avg function
def wma(df, field, window):
    weights = np.arange(1, window + 1)
    ma = df[field].rolling(window)
    wma = ma.apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)
    return wma

In [27]:
#using 100_Day column
walmart['100_Day_WMA'] = wma(walmart, 'close', 100)

### Plot the weighted moving average in a multi-line plot with its simple moving average of the same length and evaluate the differences.

In [28]:
#melt both 100 day columns to plot
melted_100 = pd.melt(walmart,
                     id_vars='date',
                     value_vars=['100_Day', '100_Day_WMA'],
                     var_name='Variables',
                     value_name='Values')

#graph
ilinechart(melted_100, 'date', 'Values', groups='Variables', 
           title='WMA vs. Simple Moving Avg (100 days)')

### Apply a simple exponential smoothing model to the observed values and store the results in a new column.

In [29]:
#single/simple Exp smoothing model
s_model = sm.tsa.ExponentialSmoothing(walmart['close']).fit()

#column for results
walmart['100_Day_SExp'] = s_model.predict(100)

### Add the simple exponential smoothing values to your multi-line plot containing the simple and weighted moving average values and evaluate how they differ.

In [32]:
#melt for plot
melted_sw = pd.melt(walmart,
                    id_vars='date',
                    value_vars=['100_Day', '100_Day_WMA', '100_Day_SExp'],
                    var_name='Variables',
                    value_name='Values')

#graph
ilinechart(melted_sw, 'date', 'Values', groups='Variables', 
           title='Smoothing Method Comparisons')

### Perform double and triple exponential smoothing and store their respective values in new columns.

In [33]:
#double exp
d_model = sm.tsa.ExponentialSmoothing(walmart['close'], trend='add').fit()
#triple exp
t_model = sm.tsa.ExponentialSmoothing(walmart['close'], 
                                      trend='add', 
                                      seasonal='add', 
                                      seasonal_periods=4).fit()

#new columns for dbl and trpl
walmart['100_Day_DExp'] = d_model.predict(100)
walmart['100_Day_TExp'] = d_model.predict(100)

### Create a new multi-line plot showing the results of the three exponential smoothing methods.

In [34]:
#melt 3 exp smoothing
melted_exp = pd.melt(walmart,
                     id_vars='date',
                     value_vars=['100_Day_SExp', '100_Day_DExp', '100_Day_TExp'],
                     var_name='Variables',
                     value_name='Values')

#graph
ilinechart(melted_exp, 'date', 'Values', groups='Variables', 
           title='The 3 Exponential smoothing methods Comparison')

### Evaluate the performances of the three exponential smoothing methods by calculating their mean absolute error and their root mean squared error. Which one modeled the data best?

In [35]:
#MAE & RMSE
walmart['SExp_Diff'] = walmart['close'] - walmart['100_Day_SExp'] #calculate error
walmart['DExp_Diff'] = walmart['close'] - walmart['100_Day_DExp']
walmart['TExp_Diff'] = walmart['close'] - walmart['100_Day_TExp']

#printing errors
print('Simple MAE:', walmart['SExp_Diff'].abs().mean(),
      'RMSE:', np.sqrt(np.mean(walmart['SExp_Diff']**2)))

print('Double MAE:', walmart['DExp_Diff'].abs().mean(),
      'RMSE:', np.sqrt(np.mean(walmart['DExp_Diff']**2)))

print('Triple MAE:', walmart['TExp_Diff'].abs().mean(),
      'RMSE:', np.sqrt(np.mean(walmart['TExp_Diff']**2)))

Simple MAE: 0.5490454100702317 RMSE: 0.8364656957126937
Double MAE: 0.548214562094865 RMSE: 0.8360934381614685
Triple MAE: 0.548214562094865 RMSE: 0.8360934381614685


In [None]:
#very similar numbers for all 3 exponential smoothing as shown as well in the graph above