In [None]:
import sklearn.datasets
import pandas as pd
import numpy as np
import torch.nn.functional as F
import torch

from featurizer import *
import sklearn.model_selection
from rtdl_revisiting_models import MLP, ResNet, FTTransformer
from sklearn.metrics import r2_score, accuracy_score

from tabdl import *

In [None]:
# dataset = sklearn.datasets.load_iris(as_frame = True)
# X: np.ndarray = dataset["data"]
# Y: np.ndarray = dataset["target"]
# task_type = 'multiclass'
# f = ClassFeaturizer(depth=3, bit=False)
# X = f.fit_transform(X, Y)
# mlp = TabDLM('MLP', task_type, n_classes=3, verbose = False)
# resnet = TabDLM('ResNet', task_type, n_classes=3, verbose = False)
# ftt = TabDLM('FTTransformer', task_type, n_classes=3, verbose = False)
# mlp.fit(X, Y)
# resnet.fit(X, Y)
# ftt.fit(X, Y)
# accuracy_score(np.argmax(mlp.predict(X), axis = 1), Y), accuracy_score(np.argmax(resnet.predict(X), axis = 1), Y), accuracy_score(np.argmax(ftt.predict(X), axis = 1), Y)

In [None]:
num_samples = 1000

# Generate continuous features
continuous_features = np.random.randn(num_samples, 3)

# Generate categorical features (numerical)
categorical_feature_1 = np.random.randint(0, 3, num_samples)  # 3 categories
categorical_feature_2 = np.random.randint(0, 5, num_samples)  # 5 categories

# Combine the features into a DataFrame
data = pd.DataFrame(continuous_features, columns=['cont_feature_1', 'cont_feature_2', 'cont_feature_3'])
data['cat_feature_1'] = categorical_feature_1
data['cat_feature_2'] = categorical_feature_2

# Generate the target variable Y as a linear combination of features + noise
coefficients = np.array([1.5, -2.0, 3.0, 0.5, -1.0])
features = np.hstack([continuous_features, categorical_feature_1.reshape(-1, 1), categorical_feature_2.reshape(-1, 1)])
noise = np.random.randn(num_samples) * 0.1  # Add some noise

Y = features.dot(coefficients) + noise
data['Y'] = Y

X = data.drop(columns = ['Y'])
Y = data['Y']

f = RegFeaturizer(depth=3, bit=False)
task_type = 'regression'
X = f.fit_transform(X, Y)
mlp = TabDLM('MLP', task_type, n_classes=3, verbose = False)
resnet = TabDLM('ResNet', task_type, n_classes=3, verbose = False)
ftt = TabDLM('FTTransformer', task_type, n_classes=3, verbose = False)
mlp.fit(X, Y)
resnet.fit(X, Y)
ftt.fit(X, Y)
r2_score(mlp.predict(X), Y), r2_score(resnet.predict(X), Y), r2_score(ftt.predict(X), Y)

In [None]:
dataset = sklearn.datasets.fetch_california_housing(as_frame = True)
X: np.ndarray = dataset["data"]
Y: np.ndarray = dataset["target"]
task_type = 'regression'
f = RegFeaturizer(depth=3, bit=False)
#X = f.fit_transform(X, Y)
mlp = TabDLM('MLP', task_type, verbose = False)
resnet = TabDLM('ResNet', task_type, verbose = False)
ftt = TabDLM('FTTransformer', task_type, verbose = False)
mlp.fit(X, Y)
resnet.fit(X, Y)
ftt.fit(X, Y)
r2_score(mlp.predict(X), Y), r2_score(resnet.predict(X), Y), r2_score(ftt.predict(X), Y)