## Preparation

In [None]:
!pip install sktime

In [None]:
import numpy as np
import pandas as pd
from sktime.utils.plotting import plot_series
from warnings import simplefilter

data = pd.read_csv("../input/all-stocks-5yr/all_stocks_5yr.csv")

simplefilter("ignore", FutureWarning)
%matplotlib inline

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.sort_values('date')

In [None]:
data["date"] = pd.to_datetime(data["date"])
data.set_index("date", inplace=True)
data.head()

In [None]:
data = data.fillna(0)#.resample("D").last()

data

In [None]:
data = data.loc[data['Name'].values == "AAPL",:]

In [None]:
data['open']

In [None]:
data['open'].plot.line()

In [None]:
 plot_series(data['open'])

## Декомпозиція часового ряду

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
plt.figure(figsize=(12,8))
plt.plot(data['open'])
plt.show()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

result_mul = seasonal_decompose(data['open'], model='multiplicative', freq=365)
result_add = seasonal_decompose(data['open'], model='additive', freq=365)

plt.rcParams.update({'figure.figsize': (10,20)})
result_mul.plot().suptitle('Multiplicative Decompose', fontsize=22)
result_add.plot().suptitle('Additive Decompose', fontsize=22)
plt.show()

## TimeSeriesSplit

In [None]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit

X = data['open']
tscv = TimeSeriesSplit(n_splits=5)

In [None]:
for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]

## Naive

In [None]:
X_train.index

In [None]:
X_train = X_train.resample('D').pad(10)

In [None]:
X_test = X_test.resample('D').pad(10)

In [None]:
from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import sMAPE, smape_loss

forecaster = NaiveForecaster(strategy="last", sp=10)
forecaster.fit(X_train)

fh = np.arange(len(X_test)) + 1

y_pred = forecaster.predict(fh)

plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])

smape_loss(y_pred, X_test)

In [None]:
len(fh)

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor

from sktime.forecasting.compose import (
    EnsembleForecaster,
    MultiplexForecaster,
    TransformedTargetForecaster,
    make_reduction,
)
from sktime.transformations.series.detrend import Deseasonalizer, Detrender
from sktime.forecasting.trend import PolynomialTrendForecaster

regressor = KNeighborsRegressor(n_neighbors=5)

#regressor = RandomForestRegressor(max_depth=10)

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", Deseasonalizer(model="additive", sp=7)),
        ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=1))),
        ("forecast",
            make_reduction(
                regressor,
                scitype="tabular-regressor",
                window_length=42,
                strategy="recursive",
            ),
        ),
    ]
)

forecaster.fit(X_train)
y_pred = forecaster.predict(fh)
plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])
smape_loss(X_test, y_pred)

## AR

In [None]:
X_train

In [None]:
# AR example
from statsmodels.tsa.ar_model import AR

model = AR(X_train, missing='drop', freq='D')

model_fit = model.fit(maxlag=2, method='mle', disp=-1)

In [None]:
model_fit.predict(len(X_train), len(X_train)+10).plot.line()

In [None]:
def mape(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

y_pred = model_fit.predict(len(X_train), len(X_train)+len(X_test)-1)

mape(X_test, y_pred), smape_loss(X_test, y_pred)

In [None]:
plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])

In [None]:
model_fit.predict(len(X_train), len(X_train)+19), X_test[0:20]

## MA

In [None]:
# MA example
from statsmodels.tsa.arima_model import ARMA

model = ARMA(X_train, order=(0, 5), freq='D', missing='drop')

model_fit = model.fit(disp=False)

In [None]:
yhat = model_fit.predict(len(X_train), len(X_train)+10)
print(yhat)

In [None]:
y_pred = model_fit.predict(len(X_train), len(X_train)+len(X_test)-1)

mape(X_test,y_pred), smape_loss(X_test,y_pred)

In [None]:
plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])

## ARMA

In [None]:
# ARMA example
from statsmodels.tsa.arima_model import ARMA
model = ARMA(X_train, order=(3, 3), freq='D', missing='drop')
model_fit = model.fit(method='mle', disp=-1)

In [None]:
yhat = model_fit.predict(len(X_train), len(X_train)+19)
print(yhat)

In [None]:
y_pred = model_fit.predict(len(X_train), len(X_train)+len(X_test)-1)

mape(X_test,y_pred), smape_loss(X_test,y_pred)

In [None]:
plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])

## ARIMA

In [None]:
# ARIMA example
from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(X_train.astype('float64'), order=(15, 1, 5), freq='D', missing='drop')
model_fit = model.fit(disp=False)

In [None]:
yhat = model_fit.predict(len(X_train), len(X_train)+19, typ='levels')
print(yhat)

In [None]:
y_pred = model_fit.predict(len(X_train), len(X_train)+len(X_test)-1, typ='levels')

mape(X_test,y_pred), smape_loss(X_test,y_pred)

In [None]:
plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])

In [None]:
len(X_test)

In [None]:
results = []
for train_index, test_index in tscv.split(X):
    X_train, X_test = X[train_index].resample('D').pad(10), X[test_index].resample('D').pad(10)
    model = ARIMA(X_train, order=(2, 1, 2), freq='D', missing='drop')
    model_fit = model.fit(disp=False)
    results.append(mape(X_test, model_fit.predict(len(X_train), len(X_train)+len(X_test)-1, typ='levels')))
    
np.mean(np.array(results))

In [None]:
np.mean(np.array(results))

In [None]:
np.array(results)

In [None]:
!pip install pmdarima

In [None]:
from sktime.forecasting.arima import AutoARIMA

forecaster = AutoARIMA(sp=12, suppress_warnings=True)
forecaster.fit(X_train)
y_pred = forecaster.predict(fh)
plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])
smape_loss(X_test, y_pred)

## SARIMA

In [None]:
# SARIMA example
from statsmodels.tsa.statespace.sarimax import SARIMAX
model = SARIMAX(X_train, order=(2, 1, 2), freq='D', seasonal_order=(2, 1, 2, 6))
model_fit = model.fit(disp=False)

In [None]:
# make prediction
yhat = model_fit.predict(len(X_train), len(X_train)+19)
print(yhat)

In [None]:
X_test.shape

In [None]:
y_pred = model_fit.predict(len(X_train), len(X_train)+len(X_test)-1, typ='levels')

mape(X_test,y_pred), smape_loss(X_test,y_pred)

In [None]:
plot_series(X_train, X_test, y_pred, labels=["y_train", "y_test", "y_pred"])

In [None]:
results = []
for train_index, test_index in tscv.split(X):
    X_train, X_test = X[train_index], X[test_index]
    model = SARIMAX(X_train.values, order=(2, 1, 2), 
                seasonal_order=(2, 1, 2, 6))
    model_fit = model.fit(disp=False)
    results.append(mape(X_test, model_fit.predict(len(X_train), len(X_train)+len(X_test)-1, typ='levels')))

In [None]:
np.var(np.array(results))

In [None]:
results

In [None]:
model_fit.summary()

## VAR

In [None]:
X = data.loc[:,['open', 'high', 'low', 'close']]

for train_index, test_index in tscv.split(X):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X.iloc[train_index, :].resample('D').pad(10), X.iloc[test_index, :].resample('D').pad(10)

# VAR example
from statsmodels.tsa.vector_ar.var_model import VAR
# fit model
model = VAR(X_train, freq="D", missing='drop') #

model_fit = model.fit(maxlags=10)

In [None]:
# make prediction
yhat = model_fit.forecast(model_fit.y, steps=X_test.shape[0])
print(yhat)

In [None]:
y_pred

In [None]:
X_test.iloc[:, 0].values

In [None]:
y_pred = model_fit.forecast(model_fit.y, steps=X_test.shape[0])

pred = pd.Series(y_pred[:, 0])#.reindex(X_test.index)
pred.index = X_test.index

mape(X_test.iloc[:, 0].values,y_pred[:, 0]), smape_loss(X_test.iloc[:, 0], pred)

In [None]:
plot_series(X_train.iloc[:, 0], X_test.iloc[:, 0], pred, labels=["y_train", "y_test", "y_pred"])

## LSTM

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense, Activation, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle

In [None]:
data

In [None]:
scaler = MinMaxScaler(feature_range = (0, 1))
dataset = scaler.fit_transform(data.iloc[:,0].values.reshape((-1,1)))

# Print a few values.
dataset[0:5]

In [None]:
TRAIN_SIZE = 0.60

train_size = int(len(dataset) * TRAIN_SIZE)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]
print("Number of entries (training set, test set): " + str((len(train), len(test))))

In [None]:
def create_dataset(dataset, window_size = 100):
    data_X, data_Y = [], []
    for i in range(len(dataset) - window_size - 1):
        a = dataset[i:(i + window_size), 0]
        data_X.append(a)
        data_Y.append(dataset[i + window_size, 0])
    return(np.array(data_X), np.array(data_Y))

In [None]:
window_size = 500
train_X, train_Y = create_dataset(train, window_size)
test_X, test_Y = create_dataset(test, window_size)
print("Original training data shape:")
print(train_X.shape)

# Reshape the input data into appropriate form for Keras.
train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))
print("New training data shape:")
print(train_X.shape)

In [None]:
train_X

In [None]:
train_Y

In [None]:
def fit_model(train_X, train_Y, window_size = 500):
    model = Sequential([
        LSTM(4, input_shape = (1, window_size)),
        Dense(1)
    ])

    model.compile(loss = "mean_squared_error", optimizer = "adam")
    model.fit(train_X, 
              train_Y, 
              epochs = 20, 
              batch_size = 1, 
              verbose = 2)
    
    return(model)

# Fit the first model.
model1 = fit_model(train_X, train_Y, window_size)

In [None]:
import math

def predict_and_score(model, X, Y):
    pred = scaler.inverse_transform(model.predict(X))
    orig_data = scaler.inverse_transform([Y])
    score = math.sqrt(mape(orig_data[0], pred[:, 0]))
    return(score, pred)

rmse_train, train_predict = predict_and_score(model1, train_X, train_Y)
rmse_test, test_predict = predict_and_score(model1, test_X, test_Y)

print("Training data score: %.2f MAPE" % rmse_train)
print("Test data score: %.2f MAPE" % rmse_test)

In [None]:
# Start with training predictions.
train_predict_plot = np.empty_like(dataset)
train_predict_plot[:, :] = np.nan
train_predict_plot[window_size:len(train_predict) + window_size, :] = train_predict

# Add test predictions.
test_predict_plot = np.empty_like(dataset)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict) + (window_size * 2) + 1:len(dataset) - 1, :] = test_predict

# Create the plot.
plt.figure(figsize = (15, 10))
plt.plot(scaler.inverse_transform(dataset), label = "True value")
plt.plot(train_predict_plot, label = "Training set prediction")
#plt.plot(test_predict_plot, label = "Test set prediction")
plt.xlabel("Months")
plt.ylabel("1000 International Airline Passengers")
plt.title("Comparison true vs. predicted training / test")
plt.legend()
plt.show()

In [None]:
# univariate multi-step vector-output stacked lstm example
from numpy import array
from tensorflow.keras.models import Sequential
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense, Activation, Dropout

def split_sequence(sequence, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
        if out_end_ix > len(sequence):
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

raw_seq = X_train.values/np.max(X_train.values)
n_steps_in, n_steps_out = 100, 50
X, y = split_sequence(raw_seq, n_steps_in, n_steps_out)
n_features = 4
X = X.reshape((X.shape[0], X.shape[1], n_features))

model = Sequential([
    LSTM(10, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features)),
    Dropout(0.3),
    LSTM(5, activation='relu'),
    Dense(n_steps_out)
])

model.compile(optimizer='adam', loss='mape')
# fit model
model.fit(X, y[:, :, 0], epochs=20)
# demonstrate prediction
x_input = X_train.values[range(-100,0)]
x_input = x_input.reshape((1, n_steps_in, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)

In [None]:
X.shape, y.shape

In [None]:
x_input = X_train.values[range(-100,0)]
x_input = x_input.reshape((1, n_steps_in, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat*np.max(X_train.values))

In [None]:
X_test

## Prophet

In [None]:
from sktime.forecasting.fbprophet import Prophet

y = X_train.iloc[:, 0]

forecaster = Prophet(
    seasonality_mode='multiplicative',
    n_changepoints=int(len(y) / 12),
    add_country_holidays={'country_name': 'Germany'},
    yearly_seasonality=True)

forecaster.fit(y)

y_pred = forecaster.predict(fh=fh)

In [None]:
y_pred

In [None]:
plot_series(X_train.iloc[:, 0], X_test.iloc[:, 0], y_pred, labels=["y_train", "y_test", "y_pred"])
smape_loss(X_test.iloc[:, 0], y_pred)