In [53]:
import numpy as np

num_users = 1000
num_items = 500
num_features = 10
num_samples = 100000
target_corr = 0.9

# Generate a random correlation matrix
corr_matrix = np.random.uniform(low=-1, high=1, size=(num_features, num_features))
corr_matrix = np.tril(corr_matrix) + np.tril(corr_matrix, -1).T
corr_matrix[np.diag_indices(num_features)] = 1.0

# Generate random data with the specified correlation
data = np.random.multivariate_normal(
    mean=np.zeros(num_features),
    cov=corr_matrix,
    size=num_samples
)

# Calculate labels based on the sum of features, multiplied by the target correlation
labels = np.sum(data, axis=1) * target_corr + np.random.normal(size=num_samples)

# Generate random user and item IDs
user_ids = np.random.randint(num_users, size=num_samples)
item_ids = np.random.randint(num_items, size=num_samples)

# Combine the data and labels into a single array
features = np.column_stack([user_ids, item_ids, data])
all_data = np.column_stack([features, labels])

# Split the data into training and validation sets
train_size = int(num_samples * 0.8)
train_data = all_data[:train_size]
val_data = all_data[train_size:]

train_user_ids = train_data[:, 0]
train_item_ids = train_data[:, 1]
train_features = train_data[:, 2:-1]
train_labels = train_data[:, -1].reshape(-1, 1)

val_user_ids = val_data[:, 0]
val_item_ids = val_data[:, 1]
val_features = val_data[:, 2:-1]
val_labels = val_data[:, -1].reshape(-1, 1)

  data = np.random.multivariate_normal(


In [54]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [55]:
from pytorch_tabnet.tab_model import TabNetRegressor

# Define the model
model = TabNetRegressor(
    n_d=32, n_a=32, n_steps=4, gamma=1.3,
    lambda_sparse=0, optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
    mask_type='entmax', scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, factor=0.9),
    scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
    device_name = device,
    verbose=1
)



In [57]:
# Train the model
model.fit(
    X_train=train_features, y_train=train_labels,
    eval_set=[(val_features, val_labels)],
    eval_name=['val'],
    max_epochs=5,
    patience=10,
    batch_size=3024,
    virtual_batch_size=128,
    num_workers=0,
    drop_last=False,
)


epoch 0  | loss: 4.3733  | val_mse: 2.79401 |  0:00:15s
epoch 1  | loss: 1.61583 | val_mse: 1.52658 |  0:00:31s
epoch 2  | loss: 1.34815 | val_mse: 1.37828 |  0:00:49s
epoch 3  | loss: 1.31546 | val_mse: 1.31404 |  0:01:07s
epoch 4  | loss: 1.27199 | val_mse: 1.17648 |  0:01:24s
Stop training because you reached max_epochs = 5 with best_epoch = 4 and best_val_mse = 1.17648




In [42]:
# Generate some test data
test_user_ids = np.random.randint(num_users, size=100)
test_item_ids = np.random.randint(num_items, size=100)
test_features = np.random.rand(100, num_features)

# Make predictions
predictions = model.predict(test_features)
