In this notebook I will present the simplest approach to time series: reducing the problem to curve fitting. The content was presented live on Abhishek Thakur's youtube channel - it's probably best to watch in parallel with following the notebook: https://www.youtube.com/watch?v=kAI67Sz92-s


In [None]:
import numpy as np 
import pandas as pd
import os

import matplotlib.pyplot as plt
import seaborn as sns


import statsmodels.api as sm

plt.style.use('fivethirtyeight')

In [None]:
class CFG:
    img_dim1 = 20
    img_dim2 = 10
    
# plt.rcParams.update({'figure.figsize': (CFG.img_dim1,CFG.img_dim2)})    

# Data and EDA

\begin{equation}
X_t = T_t + S_t + C_t + \epsilon_t
\end{equation}

Dedicated notebook: https://www.kaggle.com/konradb/practical-time-series-pt-1-the-basics


In [None]:
df = pd.read_csv('../input/tsdata-1/us_energy.csv')
df.head(3)

In [None]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace = True)
df.plot()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

plt.rcParams.update({'figure.figsize': (CFG.img_dim1,CFG.img_dim2)})

decomposition = seasonal_decompose(df, period = 12)
figure = decomposition.plot()
plt.show()

In [None]:
decomposition = seasonal_decompose(df['value'], period = 11, model = 'multiplicative')
figure = decomposition.plot()
plt.show()

In [None]:
decomposition = seasonal_decompose(df['value'], period = 12, model = 'multiplicative')
figure = decomposition.plot()
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plot_acf(df['value'])
print()

In [None]:
plot_pacf(df['value'])
print()

# Smoothing methods

Dedicated notebook: https://www.kaggle.com/konradb/practical-time-series-pt-2-smoothing-methods

In [None]:
# training / validation split
cutoff_date = '2005-12-31'
df['value'] /= 100
xtrain, xvalid  = df.loc[df.index <= cutoff_date], df.loc[df.index > cutoff_date]
print(xtrain.shape, xvalid.shape)

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing


fit1 = ExponentialSmoothing(xtrain['value'].values, seasonal_periods = 12,
                           trend = 'mul', seasonal = 'mul')

In [None]:
fit1 = fit1.fit()

In [None]:
prediction = fit1.forecast(len(xvalid)).copy()

xresiduals = xvalid['value'] - prediction

In [None]:
fit1.params_formatted

In [None]:
plot_acf(xresiduals, lags = 25)
print()

In [None]:
plot_pacf(xresiduals, lags = 25)
print()

# Prophet

Dedicated notebook:  https://www.kaggle.com/konradb/practical-time-series-pt-4-prophet

Dataset: https://www.kaggle.com/piantic/autonomous-greenhouse-challengeagc-2nd-2019

In [None]:
from fbprophet import Prophet

In [None]:
xdat = pd.read_csv('../input/greenhouse-dataset/GreenhouseClimate1.csv')
xdat.head(3)

In [None]:
xdat['time'] = pd.to_datetime(xdat['time'], unit = 'D', origin = "1899-12-30")
xdat.head(3)

In [None]:
list1 = ['time', 'Tair']

list2 = ['co2_sp', 'dx_sp', 't_rail_min_sp', 't_grow_min_sp',
        'assim_sp', 'scr_enrg_sp', 'scr_blck_sp', 't_heat_sp',
        't_vent_sp', 'window_pos_lee_sp', 'water_sup_intervals_sp_min',
        'int_blue_sp', 'int_red_sp', 'int_farred_sp',
        'int_white_sp']

xdat = xdat[list1 + list2]

In [None]:
xdat2 = xdat.resample('H', on = 'time').mean().reset_index()[list1 + list2]

In [None]:
np.isnan(xdat2[['Tair'] + list2]).describe()


In [None]:
np.isnan(xdat2[['Tair'] + list2]).describe()

xdat2.dropna(subset = ['Tair'], inplace = True)

In [None]:
xdat2['Tair'][0:1000].plot()

In [None]:
df = xdat2[['time', 'Tair']].rename(columns={"time": "ds", "Tair": "y"})

In [None]:
m = Prophet(weekly_seasonality=False, 
            interval_width = 0.95,
            mcmc_samples = 10)

In [None]:
m.fit(df)

In [None]:
future = m.make_future_dataframe(periods = 24, freq = 'H')
forecast = m.predict(future)
m.plot_components(forecast)
print()

In [None]:
m = Prophet(weekly_seasonality=False, interval_width = 0.95)

m.fit(df)
future = m.make_future_dataframe(periods= 24, freq = 'H')
forecast = m.predict(future)
m.plot_components(forecast, figsize=(CFG.img_dim1, CFG.img_dim2))
print()

In [None]:
from fbprophet.diagnostics import cross_validation, performance_metrics

In [None]:
df_cv = cross_validation(m,initial = '3700 hours', period = '24 hours', horizon = '24 hours')
df_cv.head(10)

In [None]:
df_p1 = performance_metrics(df_cv)
df_p1.head(10)

In [None]:
from sklearn.linear_model import Ridge
from sklearn.feature_selection import RFE
# incorporate the climate and irrigation setpoints

xdat2 = xdat2.dropna()

estimator = Ridge()
selector = RFE(estimator, n_features_to_select=3, step=1)
selector = selector.fit(xdat2[list2], xdat2['Tair'])
# 
to_keep = xdat2[list2].columns[selector.support_]


print(to_keep)

In [None]:
df = xdat2[['time', 'Tair']].rename(columns={"time": "ds", "Tair": "y"})

m = Prophet(weekly_seasonality=False, interval_width = 0.95)

# add the regressors to the dataframe holding the data
for f in to_keep:    
    df[f] = xdat2[f]
    m.add_regressor(f)

# the rest proceeds as before. 
m.fit(df)

In [None]:
# we repeat the same evaluation tactic as before
df_cv = cross_validation(m, initial = '3700 hours', period = '24 hours', horizon='24 hours')
df_p2 = performance_metrics(df_cv)

In [None]:
comparison = pd.DataFrame()
comparison['raw'] = df_p1.mean(axis = 0)[1:]
comparison['covariates'] = df_p2.mean(axis = 0)[1:]

print(comparison)

Neural Prophet NN component: https://www.arxiv-vanity.com/papers/1911.12436/


In [None]:
!pip install neuralprophet

In [None]:
from neuralprophet import NeuralProphet

In [None]:
df = xdat2[['time', 'Tair']].rename(columns={"time": "ds", "Tair": "y"})

model = NeuralProphet(weekly_seasonality = False)

model.fit(df, freq="H")
# forecast
df_predict = model.make_future_dataframe(df, periods= 24)
df_predict = model.predict(df_predict)
fig = model.plot(df_predict)