In [1]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

  from .autonotebook import tqdm as notebook_tqdm


Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556181
Connected. Call `.close()` to terminate connection gracefully.


In [2]:
bitcoin_fg = fs.get_feature_group(
    name='bitcoin_price_movement',
    version=2
)

In [3]:
training_data = bitcoin_fg.select_all()

In [4]:
version = 2
feature_view_training = fs.get_or_create_feature_view(
    name='bitcoin_price_movement_training_fv',
    version=version,
    query=training_data
)

In [5]:
X, _ = feature_view_training.training_data(
    description='Training data for the prediction system from the historically available data.'
)

Finished: Reading data from Hopsworks, using ArrowFlight (1.36s) 




In [6]:
import pandas as pd
pd.options.display.float_format = '{:.4f}'.format
sorted_X = X.sort_values(by='id')
sorted_X['date'] = pd.to_datetime(sorted_X['date'])

In [7]:
final_X = sorted_X.drop(columns=['id', 'date'])

In [8]:
import numpy as np
high_prices = final_X.loc[:, 'high'].values
low_prices = final_X.loc[:, 'low'].values
mid_prices = (high_prices + low_prices) / 2.0

mid_price_changes = np.diff(mid_prices) / mid_prices[:-1] * 100
mid_price_changes = np.insert(mid_price_changes, 0, 0)

features = final_X[['volume', 'ma7', 'ma21', 'bollinger_upper', 'bollinger_lower', 'volatility', 'close_usd_index', 'close_oil', 'close_gold', 'hash_rate']].values
feature_changes = np.diff(features, axis=0) / features[:-1] * 100
feature_changes = np.insert(feature_changes, 0, 0, axis=0)

combined_features = np.column_stack((mid_price_changes.reshape(-1, 1), feature_changes))

In [9]:
sequence_length = 100
sequence_data = []
sequence_labels = []

for i in range(len(combined_features) - sequence_length):
    sequence_data.append(combined_features[i:i + sequence_length])
    # Labels based on whether the next mid_price_change is positive (1) or negative (0)
    sequence_labels.append(1 if mid_price_changes[i + sequence_length] > 0 else 0)

sequence_data = np.array(sequence_data)
sequence_labels = np.array(sequence_labels)

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report

In [11]:
# Splitting the data into training and test sets, split at 80% of the data
split_index = int(len(sequence_data) * 0.8)
train_data = sequence_data[:split_index]
train_labels = sequence_labels[:split_index]
test_data = sequence_data[split_index:]
test_labels = sequence_labels[split_index:]

# We need to convert the to PyTorch tensors format in order for the model to work efficiently
train_data = TensorDataset(torch.from_numpy(train_data), torch.from_numpy(train_labels))
test_data = TensorDataset(torch.from_numpy(test_data), torch.from_numpy(test_labels))

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [12]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        gru_out, _ = self.gru(x)
        out = self.fc(self.dropout(gru_out[:, -1, :]))
        return out

input_size = combined_features.shape[1]
hidden_size = 30
output_size = 1

model = GRUModel(input_size, hidden_size, output_size)

epochs = 50
learning_rate = 0.001

train_losses = []
test_losses = []

patience = 10
best_loss = float('inf')
epochs_no_improve = 0

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
for epoch in range(epochs):
    model.train()
    train_loss = 0

    for data, label in train_loader:
        data = data.float()
        label = label.unsqueeze(-1).float()

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)
    train_losses.append(train_loss)

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for data, label in test_loader:
            data = data.float()
            label = label.unsqueeze(-1).float()
            output = model(data)
            loss = criterion(output, label)
            val_loss += loss.item()

    val_loss /= len(test_loader)
    test_losses.append(val_loss)

    if val_loss < best_loss:
        best_loss = val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), 'best_gru_model.pth')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f'Early stopping at epoch {epoch + 1}')
            break

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

model.load_state_dict(torch.load('best_gru_model.pth'))

Epoch [10/50], Train Loss: 0.6433, Validation Loss: 0.6729
Epoch [20/50], Train Loss: 0.5617, Validation Loss: 0.6753
Early stopping at epoch 26




<All keys matched successfully>

In [15]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

In [16]:
mr = project.get_model_registry()

input_schema = Schema([
    {"name": "sequence_data", "type": "tensor", "shape": [-1, sequence_length, combined_features.shape[1]]}
])
output_schema = Schema([
    {"name": "price_movement_prediction", "type": "tensor", "shape": [-1, output_size]}
])
model_schema = ModelSchema(input_schema, output_schema)

Connected. Call `.close()` to terminate connection gracefully.


In [17]:
import os
model_dir = "model"
if not os.path.isdir(model_dir):
    os.mkdir(model_dir)

torch.save(model.state_dict(), model_dir + "/bitcoin_price_movement_prediction_gru.pth")

In [19]:
gru_model_bitcoin = mr.python.create_model(
    name="bitcoin_price_movement_prediction_model_gru",
    metrics={
        "train_loss": train_losses[-1],
        "val_loss": test_losses[-1]
    },
    model_schema=model_schema,
    input_example={"sequence_data": torch.randn(1, sequence_length, combined_features.shape[1]).tolist()}
)
gru_model_bitcoin.save(model_dir)

Uploading: 100.000%|██████████| 18676/18676 elapsed<00:01 remaining<00:001,  3.27it/s]
Uploading: 100.000%|██████████| 23678/23678 elapsed<00:01 remaining<00:001,  3.27it/s]
Uploading: 100.000%|██████████| 27275825/27275825 elapsed<00:06 remaining<00:0027it/s]
Uploading: 100.000%|██████████| 235427/235427 elapsed<00:02 remaining<00:00  3.27it/s]
Uploading: 100.000%|██████████| 22874/22874 elapsed<00:01 remaining<00:0029,  7.36s/it]
Uploading: 100.000%|██████████| 352/352 elapsed<00:01 remaining<00:00
Model export complete: 100%|██████████| 6/6 [00:21<00:00,  3.56s/it]                   

Model created, explore it at https://c.app.hopsworks.ai:443/p/556181/models/bitcoin_price_movement_prediction_model_gru/1





Model(name: 'bitcoin_price_movement_prediction_model_gru', version: 1)