In [34]:
import numpy as np

# Generate some random training data
num_users = 1000
num_items = 500
num_features = 10
num_samples = 100000

user_ids = np.random.randint(num_users, size=num_samples)
item_ids = np.random.randint(num_items, size=num_samples)
features = np.random.rand(num_samples, num_features)
labels = np.random.rand(num_samples)

# Split the data into training and validation sets
train_size = int(num_samples * 0.8)
train_user_ids = user_ids[:train_size]
train_item_ids = item_ids[:train_size]
train_features = features[:train_size]
train_labels = labels[:train_size].reshape(-1, 1)

val_user_ids = user_ids[train_size:]
val_item_ids = item_ids[train_size:]
val_features = features[train_size:]
val_labels = labels[train_size:].reshape(-1, 1)


In [37]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [40]:
from pytorch_tabnet.tab_model import TabNetRegressor

# Define the model
model = TabNetRegressor(
    n_d=32, n_a=32, n_steps=4, gamma=1.3,
    lambda_sparse=0, optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
    mask_type='entmax', scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, factor=0.9),
    scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
    device_name = device,
    verbose=1
)

In [41]:
# Train the model
model.fit(
    X_train=train_features, y_train=train_labels,
    eval_set=[(val_features, val_labels)],
    eval_name=['val'],
    max_epochs=5,
    patience=10,
    batch_size=1024,
    virtual_batch_size=128,
    num_workers=0,
    drop_last=False,
)


epoch 0  | loss: 0.30363 | val_mse: 0.08418 |  0:00:12s
epoch 1  | loss: 0.0876  | val_mse: 0.08372 |  0:00:25s
epoch 2  | loss: 0.08632 | val_mse: 0.08333 |  0:00:39s
epoch 3  | loss: 0.08498 | val_mse: 0.0841  |  0:00:53s
epoch 4  | loss: 0.0846  | val_mse: 0.08345 |  0:01:09s
Stop training because you reached max_epochs = 5 with best_epoch = 2 and best_val_mse = 0.08333




In [42]:
# Generate some test data
test_user_ids = np.random.randint(num_users, size=100)
test_item_ids = np.random.randint(num_items, size=100)
test_features = np.random.rand(100, num_features)

# Make predictions
predictions = model.predict(test_features)


In [44]:
predictions.shape

(100, 1)