In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

from datetime import datetime
from matplotlib import pyplot as plt

import seaborn as sns

from tensorflow.keras.models import Model, load_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import GRU
import tensorflow as tf


from tensorflow.keras.optimizers import Adam, Nadam, AdamW
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

2024-10-25 08:30:10.387179: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-25 08:30:10.510673: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-25 08:30:10.570690: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-25 08:30:10.571135: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-25 08:30:10.663990: I tensorflow/core/platform/cpu_feature_gua

In [3]:
def mase(y_true, y_pred):

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    mae_model = np.mean(np.abs(y_true - y_pred))
    mae_naive = np.mean(np.abs(y_true[1:] - y_true[:-1]))
    
    return mae_model / mae_naive if mae_naive != 0 else float('inf')



def printRegressionMetrics(y_true, y_pred):
    
    MAPE = metrics.mean_absolute_percentage_error(y_true, y_pred)
    MSE = metrics.mean_squared_error(y_true, y_pred)
    R2 = metrics.r2_score(y_true, y_pred)
    MASE = mase(y_true, y_pred)
    
    print('MSE:')
    print(MSE)
    print('MAPE:')
    print(MAPE)
    print('MASE:')
    print(MASE)
    print('R2:')
    print(R2)

NEW_MODEL = True
MODEL_NAME = 'MickaNet-GRU-V2'

In [4]:
WINDOW_SIZE = 24
x_frames = []
y_frames = []

In [5]:
data = pd.read_csv("./Dataset/dataOld.csv")
data = data.head(25000)

In [6]:
teste = data.copy()
teste["timestamp"] = teste["timestamp"] / 1000
teste["timestamp"] = teste['timestamp'].apply(lambda x: datetime.fromtimestamp(x))
teste.reset_index(drop=True, inplace=True)

In [7]:
scaler_price = MinMaxScaler(feature_range=(0,1))
scaler_volume= MinMaxScaler(feature_range=(0,1))
scaler_trades = MinMaxScaler(feature_range=(0,1))

price_scaled = np.array(teste["price"]).copy()
volume_scaled = np.array(teste["volume"]).copy()
trades_scaled = np.array(teste["trades"]).copy()


teste['price'] = scaler_price.fit_transform(price_scaled.reshape(-1,1)).flatten()
teste['volume'] = scaler_volume.fit_transform(volume_scaled.reshape(-1,1)).flatten()
teste['trades'] = scaler_trades.fit_transform(trades_scaled.reshape(-1,1)).flatten()

In [8]:
teste = teste.set_index('timestamp')
teste

Unnamed: 0_level_0,volume,trades,price
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-01 00:00:00,0.009178,0.015846,0.391867
2020-01-01 00:15:00,0.005096,0.009680,0.392880
2020-01-01 00:30:00,0.006455,0.011474,0.391020
2020-01-01 00:45:00,0.003912,0.006773,0.391458
2020-01-01 01:00:00,0.004412,0.010274,0.390765
...,...,...,...
2020-09-17 08:45:00,0.035172,0.059518,0.819201
2020-09-17 09:00:00,0.026115,0.054485,0.817022
2020-09-17 09:15:00,0.039881,0.059079,0.813955
2020-09-17 09:30:00,0.034600,0.059787,0.815495


In [9]:
teste.reset_index(drop=True, inplace=True)
teste

Unnamed: 0,volume,trades,price
0,0.009178,0.015846,0.391867
1,0.005096,0.009680,0.392880
2,0.006455,0.011474,0.391020
3,0.003912,0.006773,0.391458
4,0.004412,0.010274,0.390765
...,...,...,...
24995,0.035172,0.059518,0.819201
24996,0.026115,0.054485,0.817022
24997,0.039881,0.059079,0.813955
24998,0.034600,0.059787,0.815495


In [10]:
x_names = teste.columns.to_list()

x_data = teste[x_names].to_numpy()
y_data = teste['price'].to_numpy()

num_samples = len(teste) - WINDOW_SIZE

x_frames = np.zeros((num_samples, WINDOW_SIZE, len(x_names)))
y_frames = np.zeros((num_samples, 1))

for i in range(num_samples):
    x_frames[i] = x_data[i:i+WINDOW_SIZE] 
    y_frames[i] = y_data[i+WINDOW_SIZE] 

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x_frames, y_frames, test_size=0.3, shuffle=False)

In [12]:
model_trained = Sequential()
model_trained.add(GRU(75, return_sequences=True,input_shape=(x_train.shape[1], x_train.shape[2])))
model_trained.add(GRU(units=30, return_sequences=True))
model_trained.add(GRU(units=30))
model_trained.add(Dense(units=1))

model_trained.compile(loss='mae', optimizer='adam')


2024-10-25 08:30:12.421861: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-25 08:30:12.520178: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-25 08:30:12.524216: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [13]:
model_trained.summary()

In [14]:
if (NEW_MODEL == True):
          
    es = EarlyStopping(monitor='loss', mode='min', patience=50)


    cp = ModelCheckpoint(filepath=f'models/{MODEL_NAME}.weights.h5', 
                           save_weights_only=True, 
                           monitor='loss', 
                           mode='min', 
                           save_best_only=True)
    
    #Adamax(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, clipnorm=1)
    #Nadam(learning_rate=0.0001, beta_1=0.85, beta_2=0.989, epsilon=1e-06, clipnorm=1)
    
    opt = Nadam(learning_rate=0.0001, beta_1=0.85, beta_2=0.989, epsilon=1e-06, clipnorm=1)
    
    model_trained.compile(optimizer = opt, loss = 'mean_squared_error')

    history = model_trained.fit(x_train, 
                               y_train, 
                               validation_split=0.2, 
                               epochs=100, 
                               verbose=1, 
                               callbacks=[cp, es], 
                               batch_size=64, 
                               shuffle=False)
    
    np.save('models/history_model.npy', history.history)
    model_trained.load_weights(f'models/{MODEL_NAME}.weights.h5')
    model_trained.save(f'models/{MODEL_NAME}.keras')
else:
    model_trained = load_model(f'models/{MODEL_NAME}.keras')

Epoch 1/100


2024-10-25 08:30:14.471371: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - loss: 0.0833 - val_loss: 2.5309e-04
Epoch 2/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 0.0015 - val_loss: 5.5578e-04
Epoch 3/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 7.6884e-04 - val_loss: 5.6412e-04
Epoch 4/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 4.6761e-04 - val_loss: 3.9448e-04
Epoch 5/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 3.1713e-04 - val_loss: 2.7821e-04
Epoch 6/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 2.2742e-04 - val_loss: 2.2299e-04
Epoch 7/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 1.7596e-04 - val_loss: 1.9373e-04
Epoch 8/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 1.4698e-04 - v

In [None]:
y_hat =  model_trained.predict(x_test)

[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [None]:
y_test2 = scaler_price.inverse_transform(y_test.flatten().reshape(-1, 1))
y_hat2 = scaler_price.inverse_transform(y_hat.flatten().reshape(-1, 1))

In [None]:
sns.set_theme(rc={'figure.figsize':(15, 4)})
sns.set_style('whitegrid')
sns.set_context('paper')

ax = df_forecast.plot(x_compat=True, style='-')
ax.set_xlabel("Data")
ax.legend(["Predicted", "Real"])
ax.set_title("Real vs Predicted price chart")

plt.grid(True)

plt.show()

plt.close()

NameError: name 'data' is not defined

In [None]:
printRegressionMetrics(y_test2, y_hat2)

MSE:
73422.39996962006
MAPE:
0.020217669530031953
MASE:
13.095600261195287
R2:
0.9331085355785695
