# Smooth time series
- Moving Average
- Savitzky-Golay Filter

## Loading the Time-Series

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.signal import savgol_filter
import plotly.express as px
from statsforecast import StatsForecast

  from tqdm.autonotebook import tqdm


In [4]:
# it has a recurrent (seasonal) yet not smooth behavior.
train = pd.read_csv('https://auto-arima-results.s3.amazonaws.com/M4-Hourly.csv')
test = pd.read_csv('https://auto-arima-results.s3.amazonaws.com/M4-Hourly-test.csv').rename(columns={'y': 'y_test'})
uid = np.array(['H386'])
df_train = train.query('unique_id in @uid')
df_test = test.query('unique_id in @uid')
StatsForecast.plot(df_train, df_test, plot_random = False, engine='plotly')

## Smoothing the Time-Series

In [8]:
computed_features = [] # I will need this list to plot later the smoothed series
for window_size in [10, 25]:
    df_train.loc[:,f'moving_average_{window_size}'] = df_train['y'].rolling(window=window_size, center=True).mean()
    df_train.loc[:,f'savgol_filter_{window_size}'] = savgol_filter(df_train['y'], window_size, 2)
    computed_features.append(f'moving_average_{window_size}')
    computed_features.append(f'savgol_filter_{window_size}')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [13]:
# window size 10
fig = px.line(df_train[df_train.ds>500], x='ds', y=['y'] + computed_features[:2], title='Different moving average estimators',
              labels={'Value': 'y', 'Date': 'Date'},
              line_shape='linear')

# Improve layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Sensor Value',
    hovermode='x'
)

fig.show()

In [15]:
# window size 25
fig = px.line(df_train[df_train.ds>500], x='ds', y=['y'] + computed_features[2:4], title='Different moving average estimators',
              labels={'Value': 'y', 'Date': 'Date'},
              line_shape='linear')

# Improve layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Sensor Value',
    hovermode='x'
)

fig.show()

#### Reference
1. [Stop using Moving Average to smooth your Time Series](https://medium.com/bip-xtech/stop-using-moving-average-to-smooth-your-time-series-2179af9ed59b)
2. [Savitzky–Golay filter](https://www.wikiwand.com/en/Savitzky%E2%80%93Golay_filter)