In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm 
import plotly.express as px


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



In [None]:
df = pd.read_csv('/content/LA_weather.csv')

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45099 entries, 0 to 45098
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      45099 non-null  datetime64[ns]
 1   Observed  45099 non-null  float64       
 2   Humidity  45099 non-null  float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 1.0 MB


In [None]:
df.head()

Unnamed: 0,Date,Observed,Humidity
0,2012-10-01 13:00:00,291.87,88.0
1,2012-10-01 14:00:00,291.868186,88.0
2,2012-10-01 15:00:00,291.862844,88.0
3,2012-10-01 16:00:00,291.857503,88.0
4,2012-10-01 17:00:00,291.852162,88.0


In [None]:
df['Day'] = df['Date'].dt.date

In [None]:
df = df.groupby('Day', as_index=False).mean()

In [None]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none')
    
    fig.show()

In [None]:
ilinechart(df, 'Day', 'Observed', title='Observations over time')

In [None]:
df['20_Day'] = df['Observed'].rolling(window=20).mean()
df['50_Day'] = df['Observed'].rolling(window=50).mean()
df['100_Day'] = df['Observed'].rolling(window=100).mean()
df['200_Day'] = df['Observed'].rolling(window=200).mean()

df.head()

Unnamed: 0,Day,Observed,Humidity,20_Day,50_Day,100_Day,200_Day
0,2012-10-01,291.846501,88.0,,,,
1,2012-10-02,295.89045,64.916667,,,,
2,2012-10-03,299.008542,44.875,,,,
3,2012-10-04,295.997917,66.625,,,,
4,2012-10-05,292.948333,70.25,,,,


In [None]:
df.head(10)

Unnamed: 0,Day,Observed,Humidity,20_Day,50_Day,100_Day,200_Day
0,2012-10-01,291.846501,88.0,,,,
1,2012-10-02,295.89045,64.916667,,,,
2,2012-10-03,299.008542,44.875,,,,
3,2012-10-04,295.997917,66.625,,,,
4,2012-10-05,292.948333,70.25,,,,
5,2012-10-06,292.36875,75.041667,,,,
6,2012-10-07,292.92875,74.583333,,,,
7,2012-10-08,293.91875,73.25,,,,
8,2012-10-09,292.440208,76.208333,,,,
9,2012-10-10,291.342708,70.416667,,,,


In [None]:
melted = pd.melt(df, id_vars='Day', value_vars=['Observed', '20_Day', '50_Day', '100_Day', '200_Day'], 
                 var_name='Variable', value_name='Value')

ilinechart(melted, 'Day', 'Value', groups='Variable', title='Moving Average Comparison')

In [None]:
# weights = np.arange(1, 20+1)
# weights/weights.sum()

In [None]:
def wma(df, field, window):
  weights = np.arange(1, window+1)
  ma = df[field].rolling(window)
  wma = ma.apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)
  return wma

In [None]:
df['200_Day_WMA'] = wma(df, 'Observed', 200)
df['20_Day_WMA'] = wma(df, 'Observed', 20)


In [None]:
df.head()

Unnamed: 0,Day,Observed,Humidity,20_Day,50_Day,100_Day,200_Day,200_Day_WMA,20_Day_WMA
0,2012-10-01,291.846501,88.0,,,,,,
1,2012-10-02,295.89045,64.916667,,,,,,
2,2012-10-03,299.008542,44.875,,,,,,
3,2012-10-04,295.997917,66.625,,,,,,
4,2012-10-05,292.948333,70.25,,,,,,


In [None]:
melted = pd.melt(df, id_vars='Day', value_vars=['Observed', '20_Day', '20_Day_WMA'], 
                 var_name='Variable', value_name='Value')

ilinechart(melted, 'Day', 'Value', groups='Variable', title='Simple vs. Weighted Moving Averages')

In [None]:
model = sm.tsa.ExponentialSmoothing(df['Observed']).fit()
df['200_Day_SExp'] = model.predict(200)

In [None]:
melted = pd.melt(df, id_vars='Day', value_vars=['Observed', '200_Day', '200_Day_WMA', '200_Day_SExp'], 
                 var_name='Variable', value_name='Value')

ilinechart(melted, 'Day', 'Value', groups='Variable', title='Smoothing Method Comparisons')

In [None]:
# Double Exponential Smoothing
model = sm.tsa.ExponentialSmoothing(df['Observed'], trend='add').fit()
df['200_Day_DExp'] = model.predict(200)

In [None]:
# Triple Exponential Smoothing
model = sm.tsa.ExponentialSmoothing(df['Observed'], trend='add', seasonal='add', seasonal_periods=4).fit()
df['200_Day_TExp'] = model.predict(200)

In [None]:
melted = pd.melt(df, id_vars='Day', value_vars=['Observed', '200_Day_SExp', '200_Day_DExp', '200_Day_TExp'], 
                 var_name='Variable', value_name='Value')

ilinechart(melted, 'Day', 'Value', groups='Variable', title='Smoothing Method Comparisons')

In [None]:
# Calculate for MAE and RMSE to evaluate performance of models
df['SExp_Diff'] = df['Observed'] - df['200_Day_SExp'] # calculate the error
df['DExp_Diff'] = df['Observed'] - df['200_Day_DExp']
df['TExp_Diff'] = df['Observed'] - df['200_Day_TExp']

print('Simple MAE: ', df['SExp_Diff'].abs().mean(), 
      'RMSE: ', np.sqrt(np.mean(df['SExp_Diff']**2)))

print('Double MAE: ', df['DExp_Diff'].abs().mean(), 
      'RMSE: ', np.sqrt(np.mean(df['DExp_Diff']**2)))

print('Triple MAE: ', df['TExp_Diff'].abs().mean(), 
      'RMSE: ', np.sqrt(np.mean(df['TExp_Diff']**2)))

Simple MAE:  1.234780089584224 RMSE:  1.6420854733303023
Double MAE:  1.2348159554350198 RMSE:  1.642082999781356
Triple MAE:  1.3154465870840863 RMSE:  1.7350758787328517
