In [19]:
!pip install pytorch-tabnet



In [20]:
import pandas as pd
import torch

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import accuracy_score

In [21]:
dataset = pd.read_csv("train.csv")
dataset.head()

Unnamed: 0,Year,Month,Consumer_type,Consumption,Consumer_number,Installation_zone
0,2013,1,domestic,0,MOGV36480546611521,Installation_zone 1
1,2013,1,industrial,5,BECS02817768252637,Installation_zone 2
2,2013,1,domestic,6,VRFW65577141436242,Installation_zone 2
3,2013,1,domestic,1,QLLI18662653137621,Installation_zone 2
4,2013,1,domestic,13,HYUO61823402850645,Installation_zone 2


In [22]:
le_iz = LabelEncoder()
le_iz.fit(dataset['Installation_zone'].unique())

le_ct = LabelEncoder()
le_ct.fit(dataset['Consumer_type'].unique())

le_cn = LabelEncoder()
le_cn.fit(dataset['Consumer_number'].unique())

dataset['Installation_zone'] = le_iz.transform(dataset['Installation_zone'])
dataset['Consumer_type'] = le_ct.transform(dataset['Consumer_type'])
dataset['Consumer_number'] = le_cn.transform(dataset['Consumer_number'])

In [23]:
X = dataset.drop('Consumer_type', axis='columns')
y = dataset['Consumer_type']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
# Define TabNet model
clf = TabNetClassifier(
    n_d=8,  # Dimensionality of the model (default is 8)
    n_a=8,  # Attention dimensionality (default is 8)
    n_steps=5,  # Number of steps in the architecture (default is 5)
    gamma=1.5,  # This parameter controls the importance of sparse regularization (default is 1.5)
    n_independent=2,  # Number of independently masked feature columns for each decision step (default is 2)
    n_shared=2,  # Number of shared decision steps (default is 2)
    cat_dims=[X_train[col].nunique() for col in X_train.select_dtypes(include=['object'])],  # Number of categories for each categorical column
    cat_emb_dim=1,  # Embedding dimension for categorical features (default is 1)
    lambda_sparse=0.001,  # This parameter controls the importance of sparse regularization (default is 0.001)
    optimizer_fn=torch.optim.Adam,  # Optimizer used for training (default is Adam)
    optimizer_params=dict(lr=2e-2),  # Learning rate for the optimizer (default is 2e-2)
    scheduler_params={"step_size":50, "gamma":0.9},  # Learning rate scheduler parameters
    scheduler_fn=torch.optim.lr_scheduler.StepLR,  # Learning rate scheduler function
    mask_type='entmax'  # Type of mask to use ('sparsemax' or 'entmax')
)



In [25]:
# Train the model
clf.fit(
    X_train.values, y_train.values,
    eval_set=[(X_train.values, y_train.values), (X_test.values, y_test.values)],
    eval_metric=['accuracy'],
    max_epochs=100,
    patience=20,
    batch_size=64,
    virtual_batch_size=32,
    num_workers=0,
    weights=1,
    drop_last=False,
    from_unsupervised=None
)

epoch 0  | loss: 1.60912 | val_0_accuracy: 0.24021 | val_1_accuracy: 0.24109 |  0:03:26s
epoch 1  | loss: 1.41533 | val_0_accuracy: 0.23696 | val_1_accuracy: 0.2354  |  0:06:44s
epoch 2  | loss: 1.36732 | val_0_accuracy: 0.36593 | val_1_accuracy: 0.36701 |  0:10:02s
epoch 3  | loss: 1.34878 | val_0_accuracy: 0.15774 | val_1_accuracy: 0.1586  |  0:13:24s
epoch 4  | loss: 1.34057 | val_0_accuracy: 0.36575 | val_1_accuracy: 0.36676 |  0:16:38s
epoch 5  | loss: 1.33416 | val_0_accuracy: 0.37599 | val_1_accuracy: 0.37776 |  0:19:52s
epoch 6  | loss: 1.33282 | val_0_accuracy: 0.31703 | val_1_accuracy: 0.31899 |  0:23:09s
epoch 7  | loss: 1.32505 | val_0_accuracy: 0.32898 | val_1_accuracy: 0.33066 |  0:26:33s
epoch 8  | loss: 1.32136 | val_0_accuracy: 0.34451 | val_1_accuracy: 0.34469 |  0:29:45s
epoch 9  | loss: 1.32741 | val_0_accuracy: 0.14463 | val_1_accuracy: 0.14545 |  0:32:54s
epoch 10 | loss: 1.31806 | val_0_accuracy: 0.28945 | val_1_accuracy: 0.29192 |  0:36:01s
epoch 11 | loss: 1.30



In [26]:
# Make predictions on the test set
y_pred = clf.predict(X_test.values)

In [27]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.37775589059777254
