In [2]:
# Install pytorch-tabnet (if not installed)
!pip install pytorch-tabnet scikit-learn

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Collecting torch>=1.3 (from pytorch-tabnet)
  Downloading torch-2.8.0-cp312-cp312-win_amd64.whl.metadata (30 kB)
Collecting sympy>=1.13.3 (from torch>=1.3->pytorch-tabnet)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
Downloading torch-2.8.0-cp312-cp312-win_amd64.whl (241.3 MB)
   ---------------------------------------- 0.0/241.3 MB ? eta -:--:--
   ---------------------------------------- 0.3/241.3 MB ? eta -:--:--
   ---------------------------------------- 0.3/241.3 MB ? eta -:--:--
   ---------------------------------------- 0.5/241.3 MB 799.2 kB/s eta 0:05:02
   ---------------------------------------- 0.5/241.3 MB 799.2 kB/s eta 0:05:02
   ---------------------------------------- 0.5/241.3 MB 799.2 kB/s eta 0:05:02
   ---------------------------------------- 0.5/241.3 MB 799.2 kB/s eta 0:05:02
   -----------------

In [3]:
# Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from pytorch_tabnet.tab_model import TabNetClassifier
import torch

In [4]:
# Load dataset
df = pd.read_csv("E:\CTS Hackathon\CTS_Hackathon\data\hospital_readmissions.csv")

# Preview dataset
print(df.shape)
df.head()

(25000, 17)


  df = pd.read_csv("E:\CTS Hackathon\CTS_Hackathon\data\hospital_readmissions.csv")


Unnamed: 0,age,time_in_hospital,n_lab_procedures,n_procedures,n_medications,n_outpatient,n_inpatient,n_emergency,medical_specialty,diag_1,diag_2,diag_3,glucose_test,A1Ctest,change,diabetes_med,readmitted
0,[70-80),8,72,1,18,2,0,0,Missing,Circulatory,Respiratory,Other,no,no,no,yes,no
1,[70-80),3,34,2,13,0,0,0,Other,Other,Other,Other,no,no,no,yes,no
2,[50-60),5,45,0,18,0,0,0,Missing,Circulatory,Circulatory,Circulatory,no,no,yes,yes,yes
3,[70-80),2,36,0,12,1,0,0,Missing,Circulatory,Other,Diabetes,no,no,yes,yes,yes
4,[60-70),1,42,0,7,0,0,0,InternalMedicine,Other,Circulatory,Respiratory,no,no,no,yes,no


In [5]:
# Define target column (update if name is different in your file)
target = "readmitted"
features = [col for col in df.columns if col != target]

# Encode categorical variables
for col in df.select_dtypes(include=['object']).columns:
    if col != target:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))

# Encode target if categorical
if df[target].dtype == 'object':
    le_target = LabelEncoder()
    df[target] = le_target.fit_transform(df[target])

X = df[features].values
y = df[target].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Normalize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:
clf = TabNetClassifier(
    device_name='cuda' if torch.cuda.is_available() else 'cpu',
    seed=42,
    verbose=1
)

clf.fit(
    X_train, y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    eval_name=['train', 'valid'],
    eval_metric=['accuracy'],
    max_epochs=50,
    patience=10,
    batch_size=1024,
    virtual_batch_size=128
)




epoch 0  | loss: 0.72645 | train_accuracy: 0.55945 | valid_accuracy: 0.5654  |  0:00:08s
epoch 1  | loss: 0.68035 | train_accuracy: 0.5804  | valid_accuracy: 0.5814  |  0:00:16s
epoch 2  | loss: 0.67427 | train_accuracy: 0.58855 | valid_accuracy: 0.5888  |  0:00:24s
epoch 3  | loss: 0.67043 | train_accuracy: 0.59385 | valid_accuracy: 0.6014  |  0:00:33s
epoch 4  | loss: 0.6694  | train_accuracy: 0.59215 | valid_accuracy: 0.5882  |  0:00:42s
epoch 5  | loss: 0.66849 | train_accuracy: 0.59605 | valid_accuracy: 0.5934  |  0:00:49s
epoch 6  | loss: 0.66468 | train_accuracy: 0.6014  | valid_accuracy: 0.6038  |  0:00:57s
epoch 7  | loss: 0.66265 | train_accuracy: 0.6038  | valid_accuracy: 0.6092  |  0:01:07s
epoch 8  | loss: 0.66092 | train_accuracy: 0.6031  | valid_accuracy: 0.6084  |  0:01:14s
epoch 9  | loss: 0.66152 | train_accuracy: 0.6051  | valid_accuracy: 0.6128  |  0:01:21s
epoch 10 | loss: 0.66062 | train_accuracy: 0.60365 | valid_accuracy: 0.6074  |  0:01:27s
epoch 11 | loss: 0.66



In [7]:
# Predictions
y_pred = clf.predict(X_test)

# Metrics
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

✅ Accuracy: 0.6136

Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.77      0.68      2649
           1       0.63      0.44      0.52      2351

    accuracy                           0.61      5000
   macro avg       0.62      0.60      0.60      5000
weighted avg       0.62      0.61      0.60      5000


Confusion Matrix:
 [[2036  613]
 [1319 1032]]
