In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.datasets import fetch_california_housing

from rtdl_num_embeddings import PiecewiseLinearEmbeddings

### Model

In [7]:
class TabMModel(nn.Module):
    def __init__(self, num_continuous, num_categorical, cat_cardinalities, embed_dim, num_bins, num_heads=4, dropout=0.1):
        super().__init__()

        self.cat_embeddings = nn.ModuleList([
            nn.Embedding(cardinality, embed_dim) for cardinality in cat_cardinalities
        ])

        self.piecewise_encoding = PiecewiseLinearEmbeddings(num_continuous, embed_dim)

        self.shared_linear = nn.Linear(embed_dim, embed_dim, bias=False)

        self.r_vectors = nn.Parameter(torch.randn(num_heads, embed_dim))
        self.s_vectors = nn.Parameter(torch.randn(num_heads, embed_dim))
        self.b_vectors = nn.Parameter(torch.randn(num_heads, embed_dim))

        self.fc1 = nn.Linear(embed_dim * num_heads, embed_dim)
        self.fc2 = nn.Linear(embed_dim, 1)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x_cont, x_cat):

        cat_embeds = [embed(x_cat[:, i]) for i, embed in enumerate(self.cat_embeddings)]
        cat_embeds = torch.cat(cat_embeds, dim=1)

        cont_embeds = self.piecewise_encoding(x_cont)
        cont_embeds = cont_embeds.view(x_cont.shape[0], -1)

        x = torch.cat([cat_embeds, cont_embeds], dim=1)

        x = self.shared_linear(x)

        ensemble_outputs = []
        for i in range(len(self.r_vectors)):
            x_transformed = (x * self.r_vectors[i]) @ self.shared_linear.weight + self.b_vectors[i]
            ensemble_outputs.append(x_transformed)

        x = torch.cat(ensemble_outputs, dim=-1)

        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

### Data fetching

In [8]:
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target  # Целевая переменная

cat_features = ['HouseAgeCat', 'AveRoomsCat']
df['HouseAgeCat'] = pd.cut(df['HouseAge'], bins=5, labels=False)
df['AveRoomsCat'] = pd.cut(df['AveRooms'], bins=5, labels=False)


num_features = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup']
cat_features = ['HouseAgeCat', 'AveRoomsCat']

encoder = OrdinalEncoder()
df[cat_features] = encoder.fit_transform(df[cat_features]).astype(int)

X_train, X_test, y_train, y_test = train_test_split(df[num_features + cat_features], df['target'], test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train[num_features] = scaler.fit_transform(X_train[num_features])
X_test[num_features] = scaler.transform(X_test[num_features])

X_train_cont = torch.tensor(X_train[num_features].values, dtype=torch.float32)
X_train_cat = torch.tensor(X_train[cat_features].values, dtype=torch.long)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_cont = torch.tensor(X_test[num_features].values, dtype=torch.float32)
X_test_cat = torch.tensor(X_test[cat_features].values, dtype=torch.long)
y_test = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

### t

In [None]:
data = fetch_california_housing()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

torch_X_train = torch.tensor(X_train, dtype=torch.float32)
torch_y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
torch_X_test = torch.tensor(X_test, dtype=torch.float32)
torch_y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(torch_X_train, torch_y_train)
test_dataset = TensorDataset(torch_X_test, torch_y_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

### Training

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = TabMModel(num_continuous=len(num_features), num_categorical=len(cat_features),
                  cat_cardinalities=[len(df[col].unique()) for col in cat_features], embed_dim=16, num_bins=4).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

X_train_cont, X_train_cat, y_train = X_train_cont.to(device), X_train_cat.to(device), y_train.to(device)
X_test_cont, X_test_cat, y_test = X_test_cont.to(device), X_test_cat.to(device), y_test.to(device)

for epoch in range(50):
    model.train()
    optimizer.zero_grad()
    y_pred = model(X_train_cont, X_train_cat)
    loss = criterion(y_pred, y_train)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            test_pred = model(X_test_cont, X_test_cat)
            test_loss = criterion(test_pred, y_test)
        print(f'Epoch {epoch}, Train Loss: {loss.item()}, Test Loss: {test_loss.item()}')

AssertionError: was expecting embedding dimension of 16, but got 128

### Evaluation

In [None]:
model.eval()
with torch.no_grad():
    example_cont = torch.tensor([[0.5, -1.2, 0.8, 0.0, -0.5, 0.3]], dtype=torch.float32).to(device)
    example_cat = torch.tensor([[2, 1]], dtype=torch.long).to(device)
    prediction = model(example_cont, example_cat)
    print("Predicted price:", prediction.item())