In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
import numpy as np

        

In [62]:
df = pd.read_parquet('../data/btcusd_precrd_regression.parquet')
df.dropna(inplace=True)

In [63]:
df.describe()
df.isna().sum()

open              0
high              0
low               0
close             0
BB_up_diff        0
BB_down_diff      0
OBV_Z             0
MACD_Histogram    0
RSI               0
CCI               0
predict_trend     0
dtype: int64

In [64]:
def preprocess_data(train_df):
    # Log-transform prices
    for col in ['open', 'high', 'low', 'close']:
        train_df[f'{col}_log_return'] = np.log(train_df[col] / train_df[col].shift(1))
    
    # Robust scaling for Bollinger features
    robust_cols = ['BB_up_diff', 'BB_down_diff']
    robust_scaler = RobustScaler().fit(train_df[robust_cols])
    train_df[robust_cols] = robust_scaler.transform(train_df[robust_cols])
    
    # Standard scaling for MACD and CCI
    std_cols = ['MACD_Histogram', 'CCI']
    std_scaler = StandardScaler().fit(train_df[std_cols])
    train_df[std_cols] = std_scaler.transform(train_df[std_cols])
    
    # MinMax for RSI (focus on 30–70 range)
    train_df['RSI'] = np.clip(train_df['RSI'], 30, 70)
    rsi_scaler = MinMaxScaler(feature_range=(0, 1)).fit(train_df[['RSI']])
    train_df['RSI'] = rsi_scaler.transform(train_df[['RSI']])
    
    return train_df

In [65]:
df = preprocess_data(df)

In [66]:
y = df['predict_trend']
x = df.drop(columns=['predict_trend'])
x_train = x[:int(len(x)*0.6)]
y_train = y[:int(len(x)*0.6)]
x_val = x[int(len(x)*0.6):int(len(x)*0.8)]
y_val = y[int(len(x)*0.6):int(len(x)*0.8)]
x_test = x[int(len(x)*0.8):]
y_test = y[int(len(x)*0.8):]

In [67]:
import numpy as np

print(np.isnan(x_train).sum(), np.isinf(x_train).sum())  # Check x_train
print(np.isnan(y_train).sum(), np.isinf(y_train).sum())  # Check y_train
print(np.isnan(x_val).sum(), np.isinf(x_val).sum())      # Check x_val
print(np.isnan(y_val).sum(), np.isinf(y_val).sum())      # Check y_val

open                0
high                0
low                 0
close               0
BB_up_diff          0
BB_down_diff        0
OBV_Z               0
MACD_Histogram      0
RSI                 0
CCI                 0
open_log_return     1
high_log_return     1
low_log_return      1
close_log_return    1
dtype: int64 open                0
high                0
low                 0
close               0
BB_up_diff          0
BB_down_diff        0
OBV_Z               0
MACD_Histogram      0
RSI                 0
CCI                 0
open_log_return     0
high_log_return     0
low_log_return      0
close_log_return    0
dtype: int64
0 0
open                0
high                0
low                 0
close               0
BB_up_diff          0
BB_down_diff        0
OBV_Z               0
MACD_Histogram      0
RSI                 0
CCI                 0
open_log_return     0
high_log_return     0
low_log_return      0
close_log_return    0
dtype: int64 open                0
high       

In [68]:
x_train = np.nan_to_num(x_train, nan=0.0, posinf=0.0, neginf=0.0)
y_train = np.nan_to_num(y_train, nan=0.0, posinf=0.0, neginf=0.0)
x_val = np.nan_to_num(x_val, nan=0.0, posinf=0.0, neginf=0.0)
y_val = np.nan_to_num(y_val, nan=0.0, posinf=0.0, neginf=0.0)

In [69]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(df.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')


  super().__init__(**kwargs)


In [None]:
model.fit(x_train, y_train, epochs=100, batch_size=64, validation_data=(x_val, y_val), verbose=1)

Epoch 1/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 28ms/step - loss: 1123.4368 - val_loss: 34664.5625
Epoch 2/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 20ms/step - loss: 1092.4630 - val_loss: 34665.9609
Epoch 3/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 19ms/step - loss: 1081.8931 - val_loss: 34665.7305
Epoch 4/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 20ms/step - loss: 1066.9144 - val_loss: 34666.3711
Epoch 5/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 19ms/step - loss: 1102.3198 - val_loss: 34667.3320
Epoch 6/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 19ms/step - loss: 1116.5245 - val_loss: 34666.4883
Epoch 7/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 19ms/step - loss: 1128.3428 - val_loss: 34665.6172
Epoch 8/100
[1m4149/4149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79