In [2]:
import pandas as pd
from plotly.graph_objects import *
from plotly.offline import init_notebook_mode,iplot
init_notebook_mode(connected=True)
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose
from plotly.subplots import make_subplots
import warnings
from sklearn.metrics import mean_absolute_error, r2_score
warnings.filterwarnings('ignore')

from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt, ExponentialSmoothing



In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/robot-dreams-code/Machine-Learning-2/main/datasets/airline-passengers.csv?token=GHSAT0AAAAAACBLQFT2SNJAKDAQP7LD7TNQZCIB4JQ')
df.head()



Unnamed: 0,Month,Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [13]:
px.line(df, x = 'Month', y = 'Passengers')


In [15]:
df['Month'] = pd.to_datetime(df['Month']) # конвертація в дейт тайм
s = pd.Series(index=df['Month'],
             data = df['Passengers'].values)
s


Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
             ... 
1960-08-01    606
1960-09-01    508
1960-10-01    461
1960-11-01    390
1960-12-01    432
Length: 144, dtype: int64

In [20]:
result = seasonal_decompose(s, model='additive') #or multiplicative
result
print(result.trend) #reasudual seasonable
print(result.resid) #reasudual seasonable
print(result.seasonal) #reasudual seasonable


Month
1949-01-01   NaN
1949-02-01   NaN
1949-03-01   NaN
1949-04-01   NaN
1949-05-01   NaN
              ..
1960-08-01   NaN
1960-09-01   NaN
1960-10-01   NaN
1960-11-01   NaN
1960-12-01   NaN
Name: trend, Length: 144, dtype: float64
Month
1949-01-01   NaN
1949-02-01   NaN
1949-03-01   NaN
1949-04-01   NaN
1949-05-01   NaN
              ..
1960-08-01   NaN
1960-09-01   NaN
1960-10-01   NaN
1960-11-01   NaN
1960-12-01   NaN
Name: resid, Length: 144, dtype: float64
Month
1949-01-01   -24.748737
1949-02-01   -36.188131
1949-03-01    -2.241162
1949-04-01    -8.036616
1949-05-01    -4.506313
                ...    
1960-08-01    62.823232
1960-09-01    16.520202
1960-10-01   -20.642677
1960-11-01   -53.593434
1960-12-01   -28.619949
Name: seasonal, Length: 144, dtype: float64


In [21]:
fig = make_subplots(rows= 3, cols = 1)

fig.append_trace(Scatter(x = result.trend.index,
                        y = result.trend.values,
                        name = 'Trend'),
                row = 1, col = 1)
fig.append_trace(Scatter(x = result.seasonal.index,
                        y = result.seasonal.values,
                        name = 'Seasonal'),
                row = 2, col = 1)
fig.append_trace(Scatter(x = result.resid.index,
                        y = result.resid.values,
                        name = 'Residual'),
                row = 3, col = 1)
fig.show()

In [23]:
px.histogram(x = result.resid.values)


In [24]:
df['yhat_simple'] = df['Passengers'].shift() # модель прозноз дорівює попередньому значенню
df




Unnamed: 0,Month,Passengers,yhat_simple,yhat_year
0,1949-01-01,112,,
1,1949-02-01,118,112.0,
2,1949-03-01,132,118.0,
3,1949-04-01,129,132.0,
4,1949-05-01,121,129.0,
...,...,...,...,...
139,1960-08-01,606,622.0,559.0
140,1960-09-01,508,606.0,463.0
141,1960-10-01,461,508.0,407.0
142,1960-11-01,390,461.0,362.0


In [11]:
px.line(df, x = 'Month', y = ['Passengers','yhat_simple']) #прогноз на 1 місяць, на 1 крок вперед




In [25]:
df['yhat_year'] = df['Passengers'].shift(12) # прогноз на 12 # застосувати на сезонну компоненту
px.line(df, x = 'Month', y = ['Passengers','yhat_year'])

In [30]:
from sklearn.model_selection import TimeSeriesSplit
tscv = TimeSeriesSplit(test_size=12) #прогноз на рік



132
12


In [33]:
trace0 = Scatter(x = s.index,
                y = s.values,
                line = {'color': 'grey'},
                name = 'Original data')
for i, (train_index, test_index) in enumerate(tscv.split(s)):
    train = s[train_index]
    test = s[test_index]
    
    trace1 = Scatter(x = train.index,
                    y= train.values,
                    name = 'Train')
    trace2 = Scatter(x = test.index,
                    y= test.values,
                    name = 'test')
    
    iplot(Figure(data = [trace0, trace1, trace2]))






In [34]:
train, test = s[0:-12], s[-12:] # розділили тест та трейн
ses = SimpleExpSmoothing(train) # просте експоненційне згладжування

ses_fitted = ses.fit(smoothing_level=0.8) 
ses_fitted.fittedvalues



Month
1949-01-01    113.728445
1949-02-01    112.345689
1949-03-01    116.869138
1949-04-01    128.973828
1949-05-01    128.994766
                 ...    
1959-08-01    530.524340
1959-09-01    553.304868
1959-10-01    481.060974
1959-11-01    421.812195
1959-12-01    373.962439
Length: 132, dtype: float64

In [35]:
iplot(Figure(data = [Scatter(x = train.index, y = train.values,
                            name = 'Train'),
                    Scatter(x = ses_fitted.fittedvalues.index,
                           y = ses_fitted.fittedvalues.values,
                           name = 'Prediction')]))

In [36]:
ses_fitted.forecast(len(test))

1960-01-01    398.792488
1960-02-01    398.792488
1960-03-01    398.792488
1960-04-01    398.792488
1960-05-01    398.792488
1960-06-01    398.792488
1960-07-01    398.792488
1960-08-01    398.792488
1960-09-01    398.792488
1960-10-01    398.792488
1960-11-01    398.792488
1960-12-01    398.792488
Freq: MS, dtype: float64

In [37]:
holt = Holt(train) # алгоритм тройного экспоненциального сглаживания
holt_fitted = holt.fit(smoothing_level=0.8, smoothing_trend=0.8)
iplot(Figure(data = [Scatter(x = train.index, y = train.values,
                            name = 'Train'),
                    Scatter(x = holt_fitted.fittedvalues.index,
                           y = holt_fitted.fittedvalues.values,
                           name = 'Prediction')]))


In [38]:
holt_fitted.forecast(len(test))




1960-01-01    392.767693
1960-02-01    401.871883
1960-03-01    410.976072
1960-04-01    420.080262
1960-05-01    429.184451
1960-06-01    438.288641
1960-07-01    447.392830
1960-08-01    456.497020
1960-09-01    465.601209
1960-10-01    474.705398
1960-11-01    483.809588
1960-12-01    492.913777
Freq: MS, dtype: float64

In [40]:
mod = ExponentialSmoothing(train,
                          trend = 'add',
                          seasonal='add',
                          seasonal_periods=12)
mod_fit = mod.fit()
preds = mod_fit.forecast(len(test))
preds


1960-01-01    415.454504
1960-02-01    397.087978
1960-03-01    457.470087
1960-04-01    445.367944
1960-05-01    466.186561
1960-06-01    520.730604
1960-07-01    592.422802
1960-08-01    598.842971
1960-09-01    498.828449
1960-10-01    442.382473
1960-11-01    396.515020
1960-12-01    424.291227
Freq: MS, dtype: float64

In [41]:
iplot(Figure([Scatter(x = test.index,
                     y = test.values,
                     name = 'Test data'),
             Scatter(x = preds.index,
                    y = preds.values,
                    name = 'Prediction')]))
from sklearn.metrics import r2_score


In [42]:
r2_score(test, preds)

0.9479522235035107