## GOOGLE

In [80]:
import numpy as np
import pickle as pkl
import pandas as pd
from neuralprophet import NeuralProphet, set_log_level
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv("Google stocks.csv")
df.columns = ['ds', 'y']
df.shape

(5138, 2)

In [3]:
quantiles = [0.015, 0.985]

params = {
    "n_lags": 24,
    "n_forecasts": 7,
    "n_changepoints": 20,
    "learning_rate": 0.01,
    "ar_layers": [32, 16, 16, 32],
    "epochs": 50,
    "batch_size": 64,
    "quantiles": quantiles,
}


m = NeuralProphet(**params)
m.set_plotting_backend("plotly-static")
set_log_level("ERROR")

df_train, df_test = m.split_df(df, valid_p=0.1, local_split=True)
print(f"Train shape: {df_train.shape}")
print(f"Test shape: {df_test.shape}")

Train shape: (4792, 2)
Test shape: (559, 2)


In [4]:
from tensorflow.keras.models import load_model
lstm_model = load_model("lstm_goog.keras")
lstm_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 75)                23100     
                                                                 
 dense (Dense)               (None, 1)                 76        
                                                                 
Total params: 23176 (90.53 KB)
Trainable params: 23176 (90.53 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [66]:
df_train.shape,df_test.shape

((4792, 2), (559, 2))

In [87]:
from keras.preprocessing.sequence import TimeseriesGenerator

test_generator = TimeseriesGenerator(df.y.iloc[-583:].values,
                                df.y.iloc[-583:].values,
                                length=24,
                                batch_size=1)

X, y = test_generator[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')

Given the Array: 
[100.20993805  98.81494904  98.45623779  97.73881531 100.37932587
  97.73881531  95.80575562  98.9444809  102.04335022 101.85402679
 101.87395477  99.21351624  98.3565979   97.69896698  97.94807434
  99.35301208  96.83207703 100.41918182 101.0269928   99.93093872
 100.17007446 101.11668396 102.60134888 104.55432129]
Predict this y: 
 [94.48052216]


In [89]:
lstm_preds = lstm_model.predict(test_generator).reshape(-1)



In [50]:
from statsmodels.tsa.arima.model import ARIMA

with open("best_order_goog.pkl", "rb") as f:
    loaded_order = pkl.load(f)

print("Loaded best order:", loaded_order)
def rolling_arima_predictions(train_data, test_data, order):
    predictions = []
    history = list(train_data)
    for t in range(len(test_data)):
        model = ARIMA(history, order=order)
        model_fit = model.fit()
        pred = model_fit.forecast(steps=1)[0]
        predictions.append(pred)
        history.append(test_data[t])
    return np.array(predictions)

Loaded best order: (2, 1, 2)


In [None]:
arima_preds = rolling_arima_predictions(df_train['y'].values, df_test['y'].values, loaded_order) #9 minutes

In [55]:
from hmmlearn import hmm

with open("opt_no_states_goog.pkl", "rb") as f:
    opt_states = pkl.load(f)


def rolling_hmm(original_dataset, original_test_dataset, opt_states, NUM_ITERS):
    train_data = original_dataset[1:]-original_dataset[:original_dataset.shape[0]-1]
    test_data = original_test_dataset[1:]-original_test_dataset[:original_test_dataset.shape[0]-1]
    predictions = []
    history = train_data
    for t in range(len(test_data)):
        model = hmm.GaussianHMM(n_components=opt_states, covariance_type='full', tol=0.0001, n_iter=NUM_ITERS)
        model.fit(history)
        hidden_states = model.predict(history)
        last_hidden_state = hidden_states[-1]
        next_state_probs = model.transmat_[last_hidden_state]
        predicted_state = np.argmax(next_state_probs)
        predicted_change = model.means_[predicted_state][0] # change prediction
        pred = original_dataset[-2]+predicted_change # calculation of new price from previous price
        predictions.append(pred)
        history = np.append(history, test_data[t]).reshape(-1,1)
        original_dataset = np.append(original_dataset, original_test_dataset[t])

    model = hmm.GaussianHMM(n_components=opt_states, covariance_type='full', tol=0.0001, n_iter=NUM_ITERS)
    model.fit(history)
    hidden_states = model.predict(history)
    last_hidden_state = hidden_states[-1]
    next_state_probs = model.transmat_[last_hidden_state]
    predicted_state = np.argmax(next_state_probs)
    predicted_change = model.means_[predicted_state][0]
    pred = original_dataset[-2]+predicted_change 
    predictions.append(pred)

    return predictions

In [56]:
train_array = np.array(df_train['y']).reshape(-1,1)
test_array = np.array(df_test['y']).reshape(-1,1)
train_array.shape, test_array.shape 

((4792, 1), (559, 1))

In [None]:
hmm_preds = rolling_hmm(train_array, test_array, opt_states, 100)
hmm_preds[0]= hmm_preds[0][0]

# Now we have the errors

In [None]:
def get_errors(df_test, out) -> np.float64:    
    # Compares previous days predictions (max 10) to real values and returns mean_absolute_error
    prev_days = 10
    if len(out) < prev_days:
        return (mean_absolute_error(df_test, out)) #returns error for situations where we only have readings less than prev_days
    else:
        return (mean_absolute_error(df_test[-prev_days:], out[-prev_days:]))

In [90]:
hmm_error = get_errors(np.array(df_test.y), hmm_preds)
arima_error = get_errors(np.array(df_test.y), arima_preds)
lstm_error = get_errors(np.array(df_test.y), lstm_preds)

print(f'ARIMA error  = {arima_error}')
print(f'HMM error = {hmm_error}')
print(f'LSTM error = {lstm_error}')

ARIMA error  = 2.2608443965641074
HMM error = 3.0809604466142675
LSTM error = 5.9743835449218805


# Weighting

## Normalized inverse of errors

In [104]:
def normalized_inverse_of_errors_weighting(model1, model2, model3):
    errors = np.array([model1, model2, model3])
    weights = (1 / errors) / np.sum(1 / errors)
    return weights

weights = normalized_inverse_of_errors_weighting(arima_error, lstm_error, hmm_error)
weights

array([0.47343226, 0.17915768, 0.34741007])

## Softmax or Exponential weighting

In [105]:
# Assign weights exponentially based on the errors. This gives more emphasis to models with significantly lower errors.

def softmax_weighting(model1, model2, model3):
    errors = np.array([model1, model2, model3])
    gamma = 1
    weights = np.exp(-gamma * errors) / np.sum(np.exp(-gamma * errors))
    return weights

weights = softmax_weighting(arima_error, lstm_error, hmm_error)
weights

array([0.6827003 , 0.01665177, 0.30064793])

## Error-Based Proportional Weighting

In [106]:
# Assign weights proportional to the inverse of the squared errors (or another power of errors). 
# More aggressive in penalizing higher errors.

def proportional_weighting(model1, model2, model3, k):
    errors = np.array([model1, model2, model3])
    weights = (1 / errors**k) / np.sum(1 / errors**k)
    return weights

weights = proportional_weighting(arima_error, lstm_error, hmm_error, 2)
weights

array([0.59464225, 0.08515515, 0.32020261])

## Rank based Weighting

In [107]:
def rank_based_weighting(model1, model2, model3):
    errors = np.array([model1, model2, model3])
    ranks = np.argsort(np.argsort(errors)) + 1
    weights = (1 / ranks) / np.sum(1 / ranks)
    return weights

weights = rank_based_weighting(arima_error, lstm_error, hmm_error)
weights

array([0.54545455, 0.18181818, 0.27272727])

In [132]:
np.array(df_train.y.iloc[-24:]).reshape(1,24)

array([[ 95.09830475, 101.08678436, 100.91739655, 100.4690094 ,
         99.51244354,  96.96160126,  94.80934143,  93.6136322 ,
         92.73678589,  93.22502899,  95.50683594,  94.96876526,
         90.87348175,  90.53469849,  88.83082581,  89.30910492,
         89.92688751,  87.9440155 ,  89.48845673,  88.55182648,
         87.61519623,  86.15045166,  88.63153839,  88.41233063]])

In [135]:
lstm_model.predict(np.array(df_train.y.iloc[-24:]).reshape(1,24))[0][0]



91.75459

In [None]:
def dynamic_ensemble_prediction(train, test):
    history = np.array(train)
    predictions = np.array([])
    lstm_pred = lstm_model.predict(history[-24:].reshape(1,24))[0][0]
    

    
