In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import plotly.express as px

  import pandas.util.testing as tm


In [2]:
df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%203/walmart_stock_prices.csv')

In [4]:
#Plot a line chart with the observed values (the daily closing prices).

def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none').update(layout=dict(title=dict(x=0.5)))
    
    fig.show()

ilinechart(df, 'date','close', title='Daily Closing Prices Over Time')

In [5]:
#Choose 4 moving average time windows and add columns to the data set containing the values for each of them.¶

df['7_day'] = df['close'].rolling(window=7).mean()
df['30_day'] = df['close'].rolling(window=30).mean()
df['60_day'] = df['close'].rolling(window=60).mean()
df['90_day'] = df['close'].rolling(window=90).mean()

In [6]:
#Plot a multi-line chart that compares the 4 different simple moving averages.¶

melted = pd.melt(df, id_vars='date', value_vars=['close','7_day','30_day','60_day','90_day'], var_name='variable',value_name='value')

ilinechart(melted, 'date', 'value', groups='variable', title='Moving Average Comparison')

In [7]:
#Write a function that calculates a weighted moving average. Take the weighted moving average of the observed values and store it in a new column in the dataframe.

def wma(df, field, window):
    weights = np.arange(1, window + 1)
    ma = df[field].rolling(window)
    wma = ma.apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)
    return wma

df['90_day_wma'] = wma(df, 'close', 90)

In [8]:
#Plot the weighted moving average in a multi-line plot with its simple moving average of the same length and evaluate the differences.

melted = pd.melt(df, id_vars='date', value_vars=['close', '90_day', '90_day_wma'], 
                 var_name='variable', value_name='value')

ilinechart(melted, 'date', 'value', groups='variable', title='Simple vs. Weighted Moving Averages')

In [9]:
#Apply a simple exponential smoothing model to the observed values and store the results in a new column.

model = sm.tsa.ExponentialSmoothing(df['close']).fit()
df['90_Day_SExp'] = model.predict(90)

In [10]:
#Add the simple exponential smoothing values to your multi-line plot containing the simple and weighted moving average values and evaluate how they differ.

melted = pd.melt(df, id_vars='date', value_vars=['close', '90_day', '90_day_wma','90_Day_SExp'], 
                 var_name='variable', value_name='value')

ilinechart(melted, 'date', 'value', groups='variable', title='Single Smoothing vs Moving Averages')

In [11]:
#Perform double and triple exponential smoothing and store their respective values in new columns.

#double
model = sm.tsa.ExponentialSmoothing(df['close'], trend='add').fit()
df['90_Day_DExp'] = model.predict(90)

#triple
model = sm.tsa.ExponentialSmoothing(df['close'], trend='add', seasonal='add',seasonal_periods=4).fit()
df['90_Day_TExp'] = model.predict(90)

In [12]:
#Create a new multi-line plot showing the results of the three exponential smoothing methods.¶

melted = pd.melt(df, id_vars='date', value_vars=['close', '90_Day_SExp', '90_Day_DExp', '90_Day_TExp'], 
                 var_name='variable', value_name='value')

ilinechart(melted, 'date', 'value', groups='variable', title='Smoothing Method Comparisons')

In [13]:
#Evaluate the performances of the three exponential smoothing methods by calculating their mean absolute error and their root mean squared error. Which one modeled the data best?

# Calculate for MAE and RMSE to evaluate performance of models
df['SExp_Diff'] = df['close'] - df['90_Day_SExp'] 
df['DExp_Diff'] = df['close'] - df['90_Day_DExp']
df['TExp_Diff'] = df['close'] - df['90_Day_TExp']

print('Simple MAE:', df['SExp_Diff'].abs().mean(), 
      'RMSE: ', np.sqrt(np.mean(df['SExp_Diff']**2)))

print('Double MAE:', df['DExp_Diff'].abs().mean(), 
      'RMSE: ', np.sqrt(np.mean(df['DExp_Diff']**2)))

print('Triple MAE:', df['TExp_Diff'].abs().mean(), 
      'RMSE: ', np.sqrt(np.mean(df['TExp_Diff']**2)))

Simple MAE: 0.5494136134448612 RMSE:  0.8357065182296808
Double MAE: 0.5485027707730344 RMSE:  0.8353679950687614
Triple MAE: 0.5479742687210215 RMSE:  0.8341088863338174
