**Data Loading**

In [None]:
import pandas as pd
import talib
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, MultiHeadAttention, LayerNormalization, Flatten
from tensorflow.keras.models import Model
from kerastuner import HyperModel
from kerastuner.tuners import RandomSearch
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
crypto_data = pd.read_csv("data/raw/crypto_data_btc_usdt.csv")

**Data Preprocessing**

Convert Timestamps

In [None]:
# Convert the timestamp column to datetime
crypto_data['timestamp'] = pd.to_datetime(crypto_data['timestamp'])

Add Feature Engineering

In [None]:
# Add ATR Feature
crypto_data['ATR'] = talib.ATR(crypto_data['high'].values, 
                               crypto_data['low'].values, 
                               crypto_data['close'].values, 
                               timeperiod=14)

# Bollinger Bands
crypto_data['upper_band'], crypto_data['middle_band'], crypto_data['lower_band'] = talib.BBANDS(crypto_data['close'], timeperiod=20)

# Moving Averages
crypto_data['MA_7'] = talib.SMA(crypto_data['close'], timeperiod=7)
crypto_data['MA_50'] = talib.SMA(crypto_data['close'], timeperiod=50)

# On-Balance Volume (OBV)
crypto_data['OBV'] = talib.OBV(crypto_data['close'], crypto_data['volume'])

# Cumulative Volume Delta (CVD)
# Assuming positive volume indicates buying and negative volume indicates selling
crypto_data['buy_volume'] = crypto_data['volume'].where(crypto_data['close'] > crypto_data['close'].shift(1), 0)
crypto_data['sell_volume'] = crypto_data['volume'].where(crypto_data['close'] < crypto_data['close'].shift(1), 0)
crypto_data['CVD'] = crypto_data['buy_volume'] - crypto_data['sell_volume']
crypto_data['CVD'] = crypto_data['CVD'].cumsum()

# Cleanup: Remove temporary columns used for CVD calculation
crypto_data.drop(['buy_volume', 'sell_volume'], axis=1, inplace=True)

# Number of lags to introduce
num_lags = 3

# Create lagged features for the 'close' column
for lag in range(1, num_lags + 1):
    crypto_data[f'close_lag_{lag}'] = crypto_data['close'].shift(lag)

# Handle NaN values
crypto_data.dropna(inplace=True)

Splitting Data

In [None]:
# Split the data into training and test sets (80% train, 20% test)
train_size = int(len(crypto_data) * 0.8)
train_data = crypto_data[:train_size]
test_data = crypto_data[train_size:]

Normalize the Data

In [None]:
# Define a scaler for the feature columns and another for the target column ('close')
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

# Fit the scalers on the training data and transform both train and test data

# Columns that are not feature columns
non_feature_cols = ['timestamp']

# Dynamically generate the feature columns list
feature_cols = [col for col in crypto_data.columns if col not in non_feature_cols]
train_data_scaled = feature_scaler.fit_transform(train_data[feature_cols])
test_data_scaled = feature_scaler.transform(test_data[feature_cols])

# Fit and transform the target scaler on the 'close' prices
train_close_scaled = target_scaler.fit_transform(train_data['close'].values.reshape(-1, 1))
test_close_scaled = target_scaler.transform(test_data['close'].values.reshape(-1, 1))

Create Sequences

In [None]:
def create_sequences(data, target, seq_length):
    sequences = []
    sequence_targets = []
    
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length]
        seq_target = target[i + seq_length]
        sequences.append(seq)
        sequence_targets.append(seq_target)
    
    return np.array(sequences), np.array(sequence_targets)

seq_length = 30
X_train, y_train = create_sequences(train_data_scaled, train_close_scaled, seq_length)
X_test, y_test = create_sequences(test_data_scaled, test_close_scaled, seq_length)

**Model Building**

In [None]:
class TransformerHyperModel(HyperModel):

    def __init__(self, seq_length, n_features):
        self.seq_length = seq_length
        self.n_features = n_features

    def build(self, hp):
        model = build_transformer_model(
            seq_length=self.seq_length,
            n_features=self.n_features,
            d_model=hp.Int('d_model', min_value=32, max_value=128, step=32),
            num_heads=hp.Choice('num_heads', values=[2, 4, 8]),
            ff_dim=hp.Int('ff_dim', min_value=64, max_value=256, step=32),
            dropout_rate=hp.Float('dropout_rate', min_value=0.1, max_value=0.5, step=0.1)
        )
        model.compile(optimizer=tf.keras.optimizers.Adam(hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG', default=1e-3)),
                      loss='mse')
        return model

# Initialize the tuner
tuner = RandomSearch(
    TransformerHyperModel(seq_length=seq_length, n_features=X_train.shape[2]),
    objective='val_loss',
    max_trials=5,
    executions_per_trial=3,
    directory='random_search',
    project_name='transformer'
)

# Search for the best model
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=1)

# Retrieve the best model
best_model = tuner.get_best_models(num_models=1)[0]

**Training**

Train the model

In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test), verbose=1)

**Evaluation & Visualization**

In [None]:
import plotly.graph_objects as go

# Evaluate the best model on test data
mse = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Mean Squared Error on Test Data: {mse:.4f}")

# Predict on test data using the best model
y_pred = best_model.predict(X_test)

# Convert predictions back to original scale
y_pred_original = target_scaler.inverse_transform(y_pred)
y_test_original = target_scaler.inverse_transform(y_test)

# Create a Plotly figure for visualization
fig = go.Figure()

# Add traces for actual and predicted values
fig.add_trace(go.Scatter(x=list(range(len(y_test_original))),
                         y=y_test_original.flatten(),
                         mode='lines',
                         name='Actual',
                         line=dict(color='blue')))
fig.add_trace(go.Scatter(x=list(range(len(y_pred_original))),
                         y=y_pred_original.flatten(),
                         mode='lines',
                         name='Predicted',
                         line=dict(color='red', dash='dot')))

# Update layout for dark mode and set height
fig.update_layout(template="plotly_dark",
                  title="Bitcoin Price Prediction using Transformers",
                  xaxis_title="Time",
                  yaxis_title="Price (in USDT)",
                  height=800)

# Show the plot
fig.show()