## GOOGLE

In [2]:
import numpy as np
import pickle as pkl
import pandas as pd
from neuralprophet import NeuralProphet, set_log_level
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import warnings
warnings.filterwarnings('ignore')

In [3]:
df=pd.read_csv("Google stocks.csv")
df.columns = ['ds', 'y']
df.shape

(5138, 2)

In [4]:
quantiles = [0.015, 0.985]

params = {
    "n_lags": 24,
    "n_forecasts": 7,
    "n_changepoints": 20,
    "learning_rate": 0.01,
    "ar_layers": [32, 16, 16, 32],
    "epochs": 50,
    "batch_size": 64,
    "quantiles": quantiles,
}


m = NeuralProphet(**params)
m.set_plotting_backend("plotly-static")
set_log_level("ERROR")

df_train, df_test = m.split_df(df, valid_p=0.1, local_split=True)
print(f"Train shape: {df_train.shape}")
print(f"Test shape: {df_test.shape}")

Train shape: (4792, 2)
Test shape: (559, 2)


In [5]:
from tensorflow.keras.models import load_model
lstm_model = load_model("lstm_goog.keras")
lstm_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 75)                23100     
                                                                 
 dense (Dense)               (None, 1)                 76        
                                                                 
Total params: 23176 (90.53 KB)
Trainable params: 23176 (90.53 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [5]:
df_train.shape,df_test.shape

((4792, 2), (559, 2))

In [87]:
from keras.preprocessing.sequence import TimeseriesGenerator

test_generator = TimeseriesGenerator(df.y.iloc[-583:].values,
                                df.y.iloc[-583:].values,
                                length=24,
                                batch_size=1)

X, y = test_generator[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')

Given the Array: 
[100.20993805  98.81494904  98.45623779  97.73881531 100.37932587
  97.73881531  95.80575562  98.9444809  102.04335022 101.85402679
 101.87395477  99.21351624  98.3565979   97.69896698  97.94807434
  99.35301208  96.83207703 100.41918182 101.0269928   99.93093872
 100.17007446 101.11668396 102.60134888 104.55432129]
Predict this y: 
 [94.48052216]


In [6]:
lstm_preds = lstm_model.predict(test_generator).reshape(-1)

NameError: name 'test_generator' is not defined

In [7]:
from statsmodels.tsa.arima.model import ARIMA

with open("best_order_goog.pkl", "rb") as f:
    loaded_order = pkl.load(f)

print("Loaded best order:", loaded_order)
def rolling_arima_predictions(train_data, test_data, order):
    predictions = []
    history = list(train_data)
    for t in range(len(test_data)):
        model = ARIMA(history, order=order)
        model_fit = model.fit()
        pred = model_fit.forecast(steps=1)[0]
        predictions.append(pred)
        history.append(test_data[t])
    return np.array(predictions)

Loaded best order: (2, 1, 2)


In [None]:
arima_preds = rolling_arima_predictions(df_train['y'].values, df_test['y'].values, loaded_order) #9 minutes

In [None]:
from hmmlearn import hmm

with open("opt_no_states_goog.pkl", "rb") as f:
    opt_states = pkl.load(f)


def rolling_hmm(original_dataset, original_test_dataset, opt_states, NUM_ITERS):
    train_data = original_dataset[1:]-original_dataset[:original_dataset.shape[0]-1]
    test_data = original_test_dataset[1:]-original_test_dataset[:original_test_dataset.shape[0]-1]
    predictions = []
    history = train_data
    for t in range(len(test_data)):
        model = hmm.GaussianHMM(n_components=opt_states, covariance_type='full', tol=0.0001, n_iter=NUM_ITERS)
        model.fit(history)
        hidden_states = model.predict(history)
        last_hidden_state = hidden_states[-1]
        next_state_probs = model.transmat_[last_hidden_state]
        predicted_state = np.argmax(next_state_probs)
        predicted_change = model.means_[predicted_state][0] # change prediction
        pred = original_dataset[-1]+predicted_change # calculation of new price from previous price
        predictions.append(pred)
        history = np.append(history, test_data[t]).reshape(-1,1)
        original_dataset = np.append(original_dataset, original_test_dataset[t])

    model = hmm.GaussianHMM(n_components=opt_states, covariance_type='full', tol=0.0001, n_iter=NUM_ITERS)
    model.fit(history)
    hidden_states = model.predict(history)
    last_hidden_state = hidden_states[-1]
    next_state_probs = model.transmat_[last_hidden_state]
    predicted_state = np.argmax(next_state_probs)
    predicted_change = model.means_[predicted_state][0]
    pred = original_dataset[-1]+predicted_change 
    predictions.append(pred)

    return predictions

In [153]:
train_array = np.array(df_train['y']).reshape(-1,1)
test_array = np.array(df_test['y']).reshape(-1,1)
train_array.shape, test_array.shape 

((4792, 1), (559, 1))

In [154]:
hmm_preds = rolling_hmm(train_array, test_array, opt_states, 100)
hmm_preds[0]= hmm_preds[0][0]

In [160]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import math
print(f'R2 Score : {r2_score(df_test.y[:-1],hmm_preds)}')
print(f'RMSE : {math.sqrt(mean_squared_error(df_test.y[:-1],hmm_preds))}')
print(f'MAE : {mean_absolute_error(df_test.y[:-1],hmm_preds)}')

R2 Score : 0.9929138951075159
RMSE : 2.5167859387849294
MAE : 1.7876579229778453


# Now we have the errors

In [20]:
def get_errors(df_test, out) -> np.float64:    
    # Compares previous days predictions (max 10) to real values and returns mean_absolute_error
    prev_days = 1
    if len(out) < prev_days:
        return (mean_absolute_error(df_test, out)) #returns error for situations where we only have readings less than prev_days
    else:
        return (mean_absolute_error(df_test[-prev_days:], out[-prev_days:]))

In [None]:
hmm_error = get_errors(np.array(df_test.y), hmm_preds)
arima_error = get_errors(np.array(df_test.y), arima_preds)
lstm_error = get_errors(np.array(df_test.y), lstm_preds)

print(f'ARIMA error  = {arima_error}')
print(f'HMM error = {hmm_error}')
print(f'LSTM error = {lstm_error}')

# Weighting

## Normalized inverse of errors

In [None]:
def normalized_inverse_of_errors_weighting(model1, model2, model3):
    errors = np.array([model1, model2, model3])
    weights = (1 / errors) / np.sum(1 / errors)
    return weights

weights = normalized_inverse_of_errors_weighting(arima_error, lstm_error, hmm_error)
weights

## Softmax or Exponential weighting

In [105]:
# Assign weights exponentially based on the errors. This gives more emphasis to models with significantly lower errors.

def softmax_weighting(model1, model2, model3):
    errors = np.array([model1, model2, model3])
    gamma = 1
    weights = np.exp(-gamma * errors) / np.sum(np.exp(-gamma * errors))
    return weights

weights = softmax_weighting(arima_error, lstm_error, hmm_error)
weights

array([0.6827003 , 0.01665177, 0.30064793])

## Error-Based Proportional Weighting

In [106]:
# Assign weights proportional to the inverse of the squared errors (or another power of errors). 
# More aggressive in penalizing higher errors.

def proportional_weighting(model1, model2, model3, k):
    errors = np.array([model1, model2, model3])
    weights = (1 / errors**k) / np.sum(1 / errors**k)
    return weights

weights = proportional_weighting(arima_error, lstm_error, hmm_error, 2)
weights

array([0.59464225, 0.08515515, 0.32020261])

## Rank based Weighting

In [107]:
def rank_based_weighting(model1, model2, model3):
    errors = np.array([model1, model2, model3])
    ranks = np.argsort(np.argsort(errors)) + 1
    weights = (1 / ranks) / np.sum(1 / ranks)
    return weights

weights = rank_based_weighting(arima_error, lstm_error, hmm_error)
weights

array([0.54545455, 0.18181818, 0.27272727])

In [139]:
train = np.array(df_train.y)
train_hmm = train.reshape(-1,1)
train_hmm = train_hmm[1:]-train_hmm[:train_hmm.shape[0]-1]
train_hmm

array([[ 0.19779491],
       [ 0.02705097],
       [-0.11242294],
       ...,
       [-1.46474457],
       [ 2.48108673],
       [-0.21920776]])

In [21]:
from hmmlearn import hmm
from statsmodels.tsa.arima.model import ARIMA
from utils import softmax_weighting

def dynamic_ensemble_prediction(train, test):
    train_hmm = train.reshape(-1,1)
    train_hmm = train_hmm[1:]-train_hmm[:train_hmm.shape[0]-1]
    test_hmm = test.reshape(-1,1)
    test_hmm = test_hmm[1:]-test_hmm[:test_hmm.shape[0]-1]
    hmm_history = train_hmm
    history = np.array(train)

    predictions = []
    truth_values = []
    lstm_preds = []
    hmm_preds = []
    arima_preds = []

    for i in range(len(test)):
        print(f'{i+1}/{len(test)}')
        truth_values.append(test[i])
        # LSTM
        lstm_pred = lstm_model.predict(history[-24:].reshape(1,24))[0][0]
        lstm_preds.append(lstm_pred)
        # ARIMA
        arima_model = ARIMA(history, order=loaded_order)
        arima_fit = arima_model.fit()
        arima_pred = arima_fit.forecast(steps=1)[0]
        arima_preds.append(arima_pred)
        # HMM
        hmm_model = hmm.GaussianHMM(n_components=opt_states, covariance_type='full', tol=0.0001, n_iter=100)
        hmm_model.fit(hmm_history)
        hidden_states = hmm_model.predict(hmm_history)
        last_hidden_state = hidden_states[-1]
        next_state_probs = hmm_model.transmat_[last_hidden_state]
        predicted_state = np.argmax(next_state_probs)
        predicted_change = hmm_model.means_[predicted_state][0]
        hmm_pred = history[-1]+predicted_change 
        hmm_preds.append(hmm_pred)

        #Error Measurement
        arima_error = get_errors(arima_preds, truth_values)
        hmm_error = get_errors(hmm_preds, truth_values)
        lstm_error = get_errors(lstm_preds, truth_values) 

        weights = softmax_weighting(arima_error, lstm_error, hmm_error) # Weighting algorithm

        predictions.append(weights[0]*arima_pred + weights[1]*lstm_pred + weights[2]*hmm_pred)
        history = np.append(history,test[i])
        
        if i != len(test)-1:
            hmm_history = np.append(hmm_history,test_hmm[i]).reshape(-1,1)
    
    return predictions, arima_preds, hmm_preds, lstm_preds
        

In [22]:
de_preds, arima_preds, hmm_preds, lstm_preds = dynamic_ensemble_prediction(np.array(df_train.y), np.array(df_test.y))

1/559
2/559
3/559
4/559
5/559
6/559
7/559
8/559
9/559
10/559
11/559
12/559
13/559
14/559
15/559
16/559
17/559
18/559
19/559
20/559
21/559
22/559
23/559
24/559
25/559
26/559
27/559
28/559
29/559
30/559
31/559
32/559
33/559
34/559
35/559
36/559
37/559
38/559
39/559
40/559
41/559
42/559
43/559
44/559
45/559
46/559
47/559
48/559
49/559
50/559
51/559
52/559
53/559
54/559
55/559
56/559
57/559
58/559
59/559
60/559
61/559
62/559
63/559
64/559
65/559
66/559
67/559
68/559
69/559
70/559
71/559
72/559
73/559
74/559
75/559
76/559
77/559
78/559
79/559
80/559
81/559
82/559
83/559
84/559
85/559
86/559
87/559
88/559
89/559
90/559
91/559
92/559
93/559
94/559
95/559
96/559
97/559
98/559
99/559
100/559
101/559
102/559
103/559
104/559
105/559
106/559
107/559
108/559
109/559
110/559
111/559
112/559
113/559
114/559
115/559
116/559
117/559
118/559
119/559
120/559
121/559
122/559
123/559
124/559
125/559
126/559
127/559
128/559
129/559
130/559
131/559
132/559
133/559
134/559
135/559
136/559
137/559
138/559
139/

In [23]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import math
print('Dynamic Ensemble')
print(f'R2 Score : {r2_score(df_test.y,de_preds)}')
print(f'RMSE : {math.sqrt(mean_squared_error(df_test.y,de_preds))}')
print(f'MAE : {mean_absolute_error(df_test.y,de_preds)}')
print('ARIMA')
print(f'R2 Score : {r2_score(df_test.y,arima_preds)}')
print(f'RMSE : {math.sqrt(mean_squared_error(df_test.y,arima_preds))}')
print(f'MAE : {mean_absolute_error(df_test.y,arima_preds)}')
print('HMM')
print(f'R2 Score : {r2_score(df_test.y,hmm_preds)}')
print(f'RMSE : {math.sqrt(mean_squared_error(df_test.y,hmm_preds))}')
print(f'MAE : {mean_absolute_error(df_test.y,hmm_preds)}')
print('LSTM')
print(f'R2 Score : {r2_score(df_test.y,lstm_preds)}')
print(f'RMSE : {math.sqrt(mean_squared_error(df_test.y, lstm_preds))}')
print(f'MAE : {mean_absolute_error(df_test.y,lstm_preds)}')


Dynamic Ensemble
R2 Score : 0.9958052189862974
RMSE : 1.9408424921960412
MAE : 1.2922420484386572
ARIMA
R2 Score : 0.9929063105081235
RMSE : 2.5238976096530474
MAE : 1.7929693887362192
HMM
R2 Score : 0.9929445010981824
RMSE : 2.5170944345896813
MAE : 1.7887162881517884
LSTM
R2 Score : 0.9639267315633713
RMSE : 5.691521645972573
MAE : 4.915551125896638


In [24]:
fig = go.Figure()
fig.update_layout(title="Dynamic Ensemble with GOOG")
fig.add_trace(go.Scatter(x=df_test['ds'], y=df_test['y'], mode='lines', name='Real Data'))
fig.add_trace(go.Scatter(x=df_test['ds'], y=de_preds, mode='lines', name='Proposed Method'))
fig.add_trace(go.Scatter(x=df_test['ds'], y=arima_preds, mode='lines', name='ARIMA'))
fig.add_trace(go.Scatter(x=df_test['ds'], y=hmm_preds, mode='lines', name='HMM'))
fig.add_trace(go.Scatter(x=df_test['ds'], y=lstm_preds, mode='lines', name='LSTM'))
fig.add_trace(go.Scatter(x=df_train['ds'], y=df_train['y'], mode='lines', name='Training'))
fig.show()