**Data Loading**

In [20]:
import pandas as pd
import talib

# Load the dataset
crypto_data = pd.read_csv("data/raw/crypto_data_btc_usdt.csv")


**Data Preprocessing**

Convert Timestamps

In [21]:
# Convert the timestamp column to datetime
crypto_data['timestamp'] = pd.to_datetime(crypto_data['timestamp'])

Add Feature Engineering

In [22]:
# Add ATR Feature
crypto_data['ATR'] = talib.ATR(crypto_data['high'].values, 
                               crypto_data['low'].values, 
                               crypto_data['close'].values, 
                               timeperiod=14)

# Handle NaN values
crypto_data.dropna(inplace=True)

Splitting Data

In [23]:
# Split the data into training and test sets (80% train, 20% test)
train_size = int(len(crypto_data) * 0.8)
train_data = crypto_data[:train_size]
test_data = crypto_data[train_size:]

Normalize the Data

In [24]:
from sklearn.preprocessing import MinMaxScaler

# Define a scaler for the feature columns and another for the target column ('close')
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

# Fit the scalers on the training data and transform both train and test data
train_data_scaled = feature_scaler.fit_transform(train_data[['open', 'high', 'low', 'close', 'volume', 'ATR']])
test_data_scaled = feature_scaler.transform(test_data[['open', 'high', 'low', 'close', 'volume', 'ATR']])

# Fit and transform the target scaler on the 'close' prices
train_close_scaled = target_scaler.fit_transform(train_data['close'].values.reshape(-1, 1))
test_close_scaled = target_scaler.transform(test_data['close'].values.reshape(-1, 1))

Create Sequences

In [25]:
import numpy as np

def create_sequences(data, target, seq_length):
    sequences = []
    sequence_targets = []
    
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length]
        seq_target = target[i + seq_length]
        sequences.append(seq)
        sequence_targets.append(seq_target)
    
    return np.array(sequences), np.array(sequence_targets)

seq_length = 30
X_train, y_train = create_sequences(train_data_scaled, train_close_scaled, seq_length)
X_test, y_test = create_sequences(test_data_scaled, test_close_scaled, seq_length)

**Model Building**

In [26]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, MultiHeadAttention, LayerNormalization, Flatten
from tensorflow.keras.models import Model

def build_transformer_model(seq_length, n_features, d_model=64, num_heads=4, ff_dim=128, dropout_rate=0.1):
    inputs = Input(shape=(seq_length, n_features))
    
    # Project input to d_model dimensions
    projected_input = Dense(d_model)(inputs)
    
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(projected_input, projected_input)
    attn_output = Dropout(dropout_rate)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(projected_input + attn_output)
    
    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dense(d_model)(ffn_output)
    ffn_output = Dropout(dropout_rate)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)
    
    flat = Flatten()(out2)
    outputs = Dense(1, activation='linear')(flat)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='mse')
    
    return model

model = build_transformer_model(seq_length, X_train.shape[2])

**Training**

Train the model

In [27]:
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test), verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


**Evaluation & Visualization**

In [28]:
import plotly.graph_objects as go

# Evaluate the model
mse = model.evaluate(X_test, y_test, verbose=0)
print(f"Mean Squared Error on Test Data: {mse:.4f}")

# Predict on test data
y_pred = model.predict(X_test)

# Convert predictions back to original scale
y_pred_original = target_scaler.inverse_transform(y_pred)
y_test_original = target_scaler.inverse_transform(y_test)

# Create a Plotly figure
fig = go.Figure()

# Add traces for actual and predicted values
fig.add_trace(go.Scatter(x=list(range(len(y_test_original))),
                         y=y_test_original.flatten(),
                         mode='lines',
                         name='Actual',
                         line=dict(color='blue')))
fig.add_trace(go.Scatter(x=list(range(len(y_pred_original))),
                         y=y_pred_original.flatten(),
                         mode='lines',
                         name='Predicted',
                         line=dict(color='red', dash='dot')))

# Update layout for dark mode and set height
fig.update_layout(template="plotly_dark",
                  title="Bitcoin Price Prediction using Transformers",
                  xaxis_title="Time",
                  yaxis_title="Price (in USDT)",
                  height=800)

# Show the plot
fig.show()

Mean Squared Error on Test Data: 0.0005
