In [19]:
# %pip install tensorflow[and-cuda]
# %pip install nbformat
# %pip install dash
# % pip install matplotlib
# % pip install plotly
# %pip install imbalanced-learn
# %pip install scikit-learn
# %pip install tqdm
# %pip install pandas
# %pip install python-binance

In [2]:
import tensorflow as tf
print("GPUs Available: ", tf.config.list_physical_devices('GPU'))

GPUs Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-11-19 21:49:33.727471: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-19 21:49:33.730099: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-19 21:49:33.730185: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [8]:
import os
import math
import numpy as np
import pandas as pd
from scipy.signal import find_peaks
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from tensorflow import keras
from tensorflow.keras import layers
import joblib
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import tensorflow.keras.backend as K 
from tensorflow.keras.callbacks import ModelCheckpoint
import os
import sys
sys.path.append("../")

import stockDataHandler



def rolling_window_normalization(data, column_name, window_size):
    """
    Rullende vindu normalisering.
    """
    rolling_mean = data[column_name].rolling(window=window_size).mean()
    rolling_std = data[column_name].rolling(window=window_size).std()
    
    normalized_column = (data[column_name] - rolling_mean) / rolling_std
    return normalized_column

def min_max_normalization(data, column_name):
    """
    Min-Max normalisering.
    """
    scaler = MinMaxScaler()
    normalized_column = scaler.fit_transform(data[[column_name]])
    return normalized_column

#filename = 'btc_august2020_november2023_5min.csv'
filename = 'btc_2017_november2023_5min.csv'

# Les inn data

stock_data = stockDataHandler.LoadDataCurrentDirectory(filename)

stockDataHandler.SetEMA(stock_data, 50, 'EMA50')
stockDataHandler.SetEMA(stock_data, 100, 'EMA100')
stockDataHandler.SetMacd(stock_data, 50)

window_size = 20  # Valgfri vindustørrelse
stock_data['Close_normalized'] = rolling_window_normalization(stock_data, 'Original_Close', window_size)
stock_data['Open_normalized'] = rolling_window_normalization(stock_data, 'Original_Open', window_size)
stock_data['High_normalized'] = rolling_window_normalization(stock_data, 'Original_High', window_size)
stock_data['Low_normalized'] = rolling_window_normalization(stock_data, 'Original_Low', window_size)

stock_data['Original_Taker_buy__base_asset_volume_normalized'] = rolling_window_normalization(stock_data, 'Original_Taker_buy__base_asset_volume', window_size)
stock_data['Original_Taker_buy__quote_asset_volume_normalized'] = rolling_window_normalization(stock_data, 'Original_Taker_buy__quote_asset_volume', window_size)

stock_data['Volume_normalized'] = rolling_window_normalization(stock_data, 'Volume', window_size)
stock_data['Number_of_trades_normalized'] = rolling_window_normalization(stock_data, 'Original_Number_of_trades', window_size) 

# stock_data['Close_normalized'] = min_max_normalization(stock_data, 'Original_Close')
# stock_data['Volume_normalized'] = min_max_normalization(stock_data, 'Volume')

# Min-Max normalisering
stock_data['EMA50_normalized'] = min_max_normalization(stock_data, 'EMA50')
stock_data['EMA100_normalized'] = min_max_normalization(stock_data, 'EMA100')
stock_data['MACD_normalized'] = min_max_normalization(stock_data, 'MACD')

# Fjern NaN-verdier som kan oppstå etter rullende vindu normalisering
stock_data.dropna(inplace=True)


stockDataHandler.CleanData(stock_data)



def generate_target(df, column_name, steps_ahead=1):
    """
    Genererer en 'Target'-kolonne basert på fremtidig pris.
    Prisen 'steps_ahead' punkter frem i tid vil være målverdien.
    """
    df['Target'] = df[column_name].shift(-steps_ahead)
    df.dropna(inplace=True)  # Fjerner NaN-verdier som kan oppstå på grunn av tidsforskyvningen
    return df

# Bruk funksjonen for å generere 'Target'-kolonnen basert på 'Original_Close'-kolonnen
stock_data = generate_target(stock_data, 'Close_normalized',1)

def create_sequences(X, y, sequence_length):  
    X_sequences = []  
    y_sequences = []  
  
    for i in range(len(X) - sequence_length):  
        X_sequences.append(X[i:i + sequence_length])  
        y_sequences.append(y[i + sequence_length])  
  
    X_sequences = np.array(X_sequences)  
    y_sequences = np.array(y_sequences)  
  
    return X_sequences, y_sequences  

# Prepare data for sequences  
X = stock_data[['Close_normalized','Open_normalized','High_normalized', 'Low_normalized', 'Number_of_trades_normalized', 'Volume_normalized','Original_Taker_buy__base_asset_volume_normalized','Original_Taker_buy__quote_asset_volume_normalized', 'EMA50_normalized', 'EMA100_normalized', 'MACD_normalized']]  
y = stock_data['Target']  
  
sequence_length = 1  # Antall tidssteg du ønsker å bruke som input  
  
# Create sequences before reshaping and splitting  
X_sequences, y_sequences = stockDataHandler.create_sequences(X.values, y.values, sequence_length)  
  
# Forme data for LSTM (samples, timesteps, features)  
X_sequences = np.reshape(X_sequences, (X_sequences.shape[0], sequence_length, X_sequences.shape[2]))  
  
# Splitte data i trening og testsett  
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.3, random_state=42)  



model = keras.Sequential()
model.add(layers.LSTM(500, return_sequences=True, input_shape=(X_train.shape[1], 11))) 
model.add(layers.LSTM(500, return_sequences=False))
model.add(layers.Dense(50))
model.add(layers.Dense(1))
# model.add(layers.Dense(y.shape[1], activation='softmax'))
model.summary()

def root_mean_squared_error(y_true, y_pred):  
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

#model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])  # endre til loss='categorical_crossentropy' for klassifisering
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[root_mean_squared_error])  # endre til loss='categorical_crossentropy' for klassifisering


# Trene modellen
#model.fit(X_train, y_train, epochs=1000, batch_size=8)

# Definer antall epoker for lagring av modellen  
save_every_n_epochs = 1  # Endre dette tallet etter ønske  
  
# Opprett en katalog for å lagre modellene  
models_directory = 'saved_models_train_v4'  
if not os.path.exists(models_directory):  
    os.makedirs(models_directory)  
  
# # Definer filnavnformatet for å reflektere antall epoker modellen er trent på  

  
# # Bruk ModelCheckpoint for å lagre modellen for hver n'te epoke  

  
# # Trene modellen med ModelCheckpoint som en callback  



# Definer filnavnformatet for å reflektere antall epoker modellen er trent på  
filename_format = os.path.join(models_directory, 'lstm_model_2017-06_2023_train_v4_epoch-{epoch:03d}_500_500_50_1.h5')  

# Bruk ModelCheckpoint for å lagre modellen for hver n'te epoke  
checkpoint = ModelCheckpoint(filepath=filename_format, save_freq=save_every_n_epochs * (len(y_train) // 8))  

# Trene modellen med ModelCheckpoint som en callback  
model.fit(X_train, y_train, epochs=30, batch_size=15, callbacks=[checkpoint])  

# Lagre modellen
# modelName = stockDataHandler.get_full_path('lstm_model_2018-06_2023_train_v2.h5')
# model.save(modelName)

print('#############################')
# Vurdere modellen på testdata
loss = model.evaluate(X_test, y_test)
print(f'Test loss: {loss}')

predictions = model.predict(X_test)
r2 = r2_score(y_test, predictions)
print(f'R2 Score: {r2}')



Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 1, 500)            1024000   
                                                                 
 lstm_13 (LSTM)              (None, 500)               2002000   
                                                                 
 dense_12 (Dense)            (None, 50)                25050     
                                                                 
 dense_13 (Dense)            (None, 1)                 51        
                                                                 
Total params: 3051101 (11.64 MB)
Trainable params: 3051101 (11.64 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/30
 1626/30531 [>.............................] - ETA: 51s - loss: 0.8070 - root_mean_squared_error: 0.8722

KeyboardInterrupt: 

In [4]:
from keras.callbacks import Callback
from sklearn.metrics import r2_score

class R2ScoreCallback(Callback):
    def __init__(self, x_test, y_test, save_every_n_epochs):
        super().__init__()
        self.x_test = x_test
        self.y_test = y_test
        self.save_every_n_epochs = save_every_n_epochs

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_every_n_epochs == 0:
            predictions = self.model.predict(self.x_test)
            r2 = r2_score(self.y_test, predictions)
            print(f'Epoch {epoch + 1}: R2 Score: {r2}')

In [5]:
import os
import math
import numpy as np
import pandas as pd
from scipy.signal import find_peaks
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from tensorflow import keras
from tensorflow.keras import layers
import joblib
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import tensorflow.keras.backend as K 
from tensorflow.keras.callbacks import ModelCheckpoint
import os
import sys
sys.path.append("../")

import stockDataHandler



def rolling_window_normalization(data, column_name, window_size):
    """
    Rullende vindu normalisering.
    """
    rolling_mean = data[column_name].rolling(window=window_size).mean()
    rolling_std = data[column_name].rolling(window=window_size).std()
    
    normalized_column = (data[column_name] - rolling_mean) / rolling_std
    return normalized_column

def min_max_normalization(data, column_name):
    """
    Min-Max normalisering.
    """
    scaler = MinMaxScaler()
    normalized_column = scaler.fit_transform(data[[column_name]])
    return normalized_column

#filename = 'btc_august2020_november2023_5min.csv'
filename = 'btc_2017_november2023_5min.csv'

# Les inn data

stock_data = stockDataHandler.LoadDataCurrentDirectory(filename)

stockDataHandler.SetEMA(stock_data, 50, 'EMA50')
stockDataHandler.SetEMA(stock_data, 100, 'EMA100')
stockDataHandler.SetMacd(stock_data, 50)

window_size = 20  # Valgfri vindustørrelse
stock_data['Close_normalized'] = rolling_window_normalization(stock_data, 'Original_Close', window_size)
stock_data['Open_normalized'] = rolling_window_normalization(stock_data, 'Original_Open', window_size)
stock_data['High_normalized'] = rolling_window_normalization(stock_data, 'Original_High', window_size)
stock_data['Low_normalized'] = rolling_window_normalization(stock_data, 'Original_Low', window_size)

stock_data['Original_Taker_buy__base_asset_volume_normalized'] = rolling_window_normalization(stock_data, 'Original_Taker_buy__base_asset_volume', window_size)
stock_data['Original_Taker_buy__quote_asset_volume_normalized'] = rolling_window_normalization(stock_data, 'Original_Taker_buy__quote_asset_volume', window_size)

stock_data['Volume_normalized'] = rolling_window_normalization(stock_data, 'Volume', window_size)
stock_data['Number_of_trades_normalized'] = rolling_window_normalization(stock_data, 'Original_Number_of_trades', window_size) 

# stock_data['Close_normalized'] = min_max_normalization(stock_data, 'Original_Close')
# stock_data['Volume_normalized'] = min_max_normalization(stock_data, 'Volume')

# Min-Max normalisering
stock_data['EMA50_normalized'] = min_max_normalization(stock_data, 'EMA50')
stock_data['EMA100_normalized'] = min_max_normalization(stock_data, 'EMA100')
stock_data['MACD_normalized'] = min_max_normalization(stock_data, 'MACD')

# Fjern NaN-verdier som kan oppstå etter rullende vindu normalisering
stock_data.dropna(inplace=True)


stockDataHandler.CleanData(stock_data)

def generate_target(df, column_name, steps_ahead=1):
    """
    Genererer en 'Target'-kolonne basert på fremtidig pris.
    Prisen 'steps_ahead' punkter frem i tid vil være målverdien.
    """
    df['Target'] = df[column_name].shift(-steps_ahead)
    df.dropna(inplace=True)  # Fjerner NaN-verdier som kan oppstå på grunn av tidsforskyvningen
    return df

# Bruk funksjonen for å generere 'Target'-kolonnen basert på 'Original_Close'-kolonnen
stock_data = generate_target(stock_data, 'Close_normalized',1)

def create_sequences(X, y, sequence_length):  
    X_sequences = []  
    y_sequences = []  
  
    for i in range(len(X) - sequence_length):  
        X_sequences.append(X[i:i + sequence_length])  
        y_sequences.append(y[i + sequence_length])  
  
    X_sequences = np.array(X_sequences)  
    y_sequences = np.array(y_sequences)  
  
    return X_sequences, y_sequences  

# Prepare data for sequences  
X = stock_data[['Close_normalized','Open_normalized','High_normalized', 'Low_normalized', 'Number_of_trades_normalized', 'Volume_normalized','Original_Taker_buy__base_asset_volume_normalized','Original_Taker_buy__quote_asset_volume_normalized', 'EMA50_normalized', 'EMA100_normalized', 'MACD_normalized']]  
y = stock_data['Target']  
  
sequence_length = 1  # Antall tidssteg du ønsker å bruke som input  
  
# Create sequences before reshaping and splitting  
X_sequences, y_sequences = stockDataHandler.create_sequences(X.values, y.values, sequence_length)  
  
# Forme data for LSTM (samples, timesteps, features)  
X_sequences = np.reshape(X_sequences, (X_sequences.shape[0], sequence_length, X_sequences.shape[2]))  
  
# Splitte data i trening og testsett  
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.3, random_state=42)  



model = keras.Sequential()
model.add(layers.LSTM(500, return_sequences=True, input_shape=(X_train.shape[1], 11))) 
model.add(layers.LSTM(500, return_sequences=False))
model.add(layers.Dense(50))
model.add(layers.Dense(1))
# model.add(layers.Dense(y.shape[1], activation='softmax'))
model.summary()

def custom_root_mean_squared_error(y_true, y_pred, percent_threshold=1.0):
    # Beregn prosentavviket mellom y_true og y_pred
    percent_error = 100 * np.abs((y_pred - y_true) / y_true)
    
    # Beregn RMSE bare for de eksemplene der prosentavviket er innenfor det angitte prosentintervallet
    squared_errors = np.square(y_pred - y_true)
    filtered_squared_errors = np.where(percent_error <= percent_threshold, squared_errors, 0)
    
    # Beregn RMSE for de filtrerte feilene
    rmse = np.sqrt(np.mean(filtered_squared_errors))
    return rmse

#model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])  # endre til loss='categorical_crossentropy' for klassifisering
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[root_mean_squared_error])  # endre til loss='categorical_crossentropy' for klassifisering


# Trene modellen
#model.fit(X_train, y_train, epochs=1000, batch_size=8)

# Definer antall epoker for lagring av modellen  
save_every_n_epochs = 1  # Endre dette tallet etter ønske  
  
# Opprett en katalog for å lagre modellene  
models_directory = 'saved_models_train_v4'  
if not os.path.exists(models_directory):  
    os.makedirs(models_directory)  
  
# Definer filnavnformatet for å reflektere antall epoker modellen er trent på  
filename_format = os.path.join(models_directory, 'lstm_model_2017-06_2023_train_v4_epoch-{epoch:03d}_500_500_50_1.h5')  
  
r2_callback = R2ScoreCallback(X_test, y_test, save_every_n_epochs)

# Legge til både checkpoint og R2ScoreCallback i callbacks-listen
model.fit(X_train, y_train, epochs=30, batch_size=15, callbacks=[checkpoint, r2_callback])




Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 1, 500)            1024000   
                                                                 
 lstm_7 (LSTM)               (None, 500)               2002000   
                                                                 
 dense_6 (Dense)             (None, 50)                25050     
                                                                 
 dense_7 (Dense)             (None, 1)                 51        
                                                                 
Total params: 3051101 (11.64 MB)
Trainable params: 3051101 (11.64 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/30
Epoch 1: R2 Score: 0.5209791110685491
Epoch 2/30

  saving_api.save_model(


Epoch 2: R2 Score: 0.5231380591485574
Epoch 3/30
Epoch 3: R2 Score: 0.5224530026279866
Epoch 4/30

  saving_api.save_model(


Epoch 4: R2 Score: 0.5213790879582907
Epoch 5/30
Epoch 5: R2 Score: 0.5228176314523746
Epoch 6/30

  saving_api.save_model(


Epoch 6: R2 Score: 0.5229614603675607
Epoch 7/30
Epoch 7: R2 Score: 0.5226765574060173
Epoch 8/30
 5418/30531 [====>.........................] - ETA: 42s - loss: 0.7661 - root_mean_squared_error: 0.8501

  saving_api.save_model(


Epoch 8: R2 Score: 0.5231607304215922
Epoch 9/30
Epoch 9: R2 Score: 0.5226320762855394
Epoch 10/30
 1588/30531 [>.............................] - ETA: 50s - loss: 0.7592 - root_mean_squared_error: 0.8456

  saving_api.save_model(


Epoch 10: R2 Score: 0.5232117732636867
Epoch 11/30

  saving_api.save_model(


Epoch 11: R2 Score: 0.5235437566543961
Epoch 12/30
Epoch 12: R2 Score: 0.523646247206973
Epoch 13/30

  saving_api.save_model(


Epoch 13: R2 Score: 0.5237910659789158
Epoch 14/30
Epoch 14: R2 Score: 0.5229110153211951
Epoch 15/30

  saving_api.save_model(


Epoch 15: R2 Score: 0.5234045322278422
Epoch 16/30
Epoch 16: R2 Score: 0.5234700177851543
Epoch 17/30

  saving_api.save_model(


Epoch 17: R2 Score: 0.5232813739147675
Epoch 18/30
Epoch 18: R2 Score: 0.5234275265662977
Epoch 19/30

  saving_api.save_model(


Epoch 19: R2 Score: 0.523261688417757
Epoch 20/30
Epoch 20: R2 Score: 0.5231388374327631
Epoch 21/30

  saving_api.save_model(


Epoch 21: R2 Score: 0.5230218134555817
Epoch 22/30
Epoch 22: R2 Score: 0.523754937985941
Epoch 23/30
 5398/30531 [====>.........................] - ETA: 43s - loss: 0.7586 - root_mean_squared_error: 0.8461

  saving_api.save_model(


Epoch 23: R2 Score: 0.5234349866763549
Epoch 24/30
Epoch 24: R2 Score: 0.5236809416666024
Epoch 25/30
 1591/30531 [>.............................] - ETA: 49s - loss: 0.7654 - root_mean_squared_error: 0.8497

  saving_api.save_model(


Epoch 25: R2 Score: 0.5235119493705538
Epoch 26/30
 4515/30531 [===>..........................] - ETA: 45s - loss: 0.7637 - root_mean_squared_error: 0.8484

KeyboardInterrupt: 