In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder


In [2]:
# 1. Download the dataset (OpenML: id 180)
dataset = openml.datasets.get_dataset(180)
X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute, dataset_format='dataframe')

In [3]:
# 2. Preprocessing: Numeric features only, standardize, encode labels (classes are 1-7)
X_numeric = X.select_dtypes(include=[np.number])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numeric.values.astype(np.float32))

le = LabelEncoder()
y_encoded = le.fit_transform(y)  # Converts to 0...6
y_encoded = y_encoded.astype(np.int64)


In [4]:
# 3. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [5]:
# 4. PyTorch Dataset
class CovertypeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = CovertypeDataset(X_train, y_train)
test_ds = CovertypeDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=256, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=256)

In [6]:
# 5. MLP Model (swap for your technique if you want)
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )
    def forward(self, x):
        return self.model(x)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP(X_train.shape[1], len(le.classes_)).to(device)


In [8]:
# 6. Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [9]:
from utils import train

train_metrics, eval_metrics = train(model, train_loader, test_loader, 50, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 0.6929 Average_Time_per_batch: 0.0020 Train_Accuracy: 0.6437 Train_Loss: 0.9923 Validation_Accuracy: 0.6820 Validation_Loss: 0.8522
Epoch: 2 Total_Time: 0.4816 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.6878 Train_Loss: 0.8462 Validation_Accuracy: 0.6950 Validation_Loss: 0.8186
Epoch: 3 Total_Time: 0.4664 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.6990 Train_Loss: 0.8232 Validation_Accuracy: 0.7061 Validation_Loss: 0.7995
Epoch: 4 Total_Time: 0.4823 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.7049 Train_Loss: 0.8090 Validation_Accuracy: 0.7109 Validation_Loss: 0.7886
Epoch: 5 Total_Time: 0.4865 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.7105 Train_Loss: 0.7960 Validation_Accuracy: 0.7126 Validation_Loss: 0.7758
Epoch: 6 Total_Time: 0.4818 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.7168 Train_Loss: 0.7846 Validation_Accuracy: 0.7195 Validation_Loss: 0.7652
Epoch: 7 Total_Time: 0.5246 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.7214

In [10]:
from dpn_3.dpn import DPN
model = DPN(X_train.shape[1], 192 + len(le.classes_), len(le.classes_)).to(device)
#model.compile()

In [11]:
# 6. Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [12]:
train_metrics, eval_metrics = train(model, train_loader, test_loader, 50, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 0.4025 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.6372 Train_Loss: 0.9994 Validation_Accuracy: 0.6747 Validation_Loss: 0.8749
Epoch: 2 Total_Time: 0.3963 Average_Time_per_batch: 0.0011 Train_Accuracy: 0.6807 Train_Loss: 0.8697 Validation_Accuracy: 0.6879 Validation_Loss: 0.8394
Epoch: 3 Total_Time: 0.3977 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.6900 Train_Loss: 0.8480 Validation_Accuracy: 0.6945 Validation_Loss: 0.8253
Epoch: 4 Total_Time: 0.3986 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.6943 Train_Loss: 0.8343 Validation_Accuracy: 0.7028 Validation_Loss: 0.8120
Epoch: 5 Total_Time: 0.3958 Average_Time_per_batch: 0.0011 Train_Accuracy: 0.6997 Train_Loss: 0.8232 Validation_Accuracy: 0.7044 Validation_Loss: 0.8040
Epoch: 6 Total_Time: 0.3970 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.7035 Train_Loss: 0.8145 Validation_Accuracy: 0.7106 Validation_Loss: 0.7942
Epoch: 7 Total_Time: 0.3970 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.7075

In [13]:
from dpn_2.dpn import DPN as DPN_4
model = DPN_4(X_train.shape[1], 192 + len(le.classes_), len(le.classes_), False).to(device)
model.compile()

In [14]:
# 6. Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [15]:
train_metrics, eval_metrics = train(model, train_loader, test_loader, 50, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 16.7784 Average_Time_per_batch: 0.0486 Train_Accuracy: 0.6642 Train_Loss: 0.9298 Validation_Accuracy: 0.6987 Validation_Loss: 0.8175
Epoch: 2 Total_Time: 16.7006 Average_Time_per_batch: 0.0484 Train_Accuracy: 0.6994 Train_Loss: 0.8209 Validation_Accuracy: 0.7127 Validation_Loss: 0.7857
Epoch: 3 Total_Time: 17.7637 Average_Time_per_batch: 0.0515 Train_Accuracy: 0.7113 Train_Loss: 0.7929 Validation_Accuracy: 0.7147 Validation_Loss: 0.7789
Epoch: 4 Total_Time: 20.0138 Average_Time_per_batch: 0.0580 Train_Accuracy: 0.7200 Train_Loss: 0.7728 Validation_Accuracy: 0.7271 Validation_Loss: 0.7567
Epoch: 5 Total_Time: 18.6036 Average_Time_per_batch: 0.0539 Train_Accuracy: 0.7286 Train_Loss: 0.7550 Validation_Accuracy: 0.7354 Validation_Loss: 0.7433
Epoch: 6 Total_Time: 18.1225 Average_Time_per_batch: 0.0525 Train_Accuracy: 0.7355 Train_Loss: 0.7412 Validation_Accuracy: 0.7391 Validation_Loss: 0.7253
Epoch: 7 Total_Time: 19.4615 Average_Time_per_batch: 0.0564 Train_Accuracy:

In [16]:
from dpn_2.dpn import DPN as DPN2

hidden_dims = [128, 64, len(le.classes_)]
total = sum(hidden_dims)

blocks = len(hidden_dims)
features = X_train.shape[1]
neural_blocks = []
for dim in hidden_dims:
    std_dev = torch.sqrt(torch.tensor(1 / features)).to(device)
    neural_blocks.append(torch.randn(dim, features).to(device) * std_dev)
    features += dim

feature_blocks = []
features_start = 0
for i in range(len(neural_blocks)):
    features_end = neural_blocks[i].shape[1]
    block = neural_blocks[i][:, features_start:]
    for j in range(i + 1, len(neural_blocks)):
        block = torch.cat((block, neural_blocks[j][:, features_start:features_end]), dim=0)
    feature_blocks.append(nn.Parameter(block))
    features_start = features_end

biases = biases = nn.Parameter(torch.empty(total).uniform_(0.0, 1.0)).to(device)

In [17]:
for block in feature_blocks:
    print(block.shape)
print(biases.shape)

torch.Size([199, 14])
torch.Size([71, 128])
torch.Size([7, 64])
torch.Size([199])


In [18]:
model = DPN2(X_train.shape[1], total, len(le.classes_), True).cuda()
model.weights.extend(feature_blocks)
model.biases = biases

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [20]:
train_metrics, eval_metrics = train(model, train_loader, test_loader, 50, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 0.8724 Average_Time_per_batch: 0.0025 Train_Accuracy: 0.6516 Train_Loss: 0.9403 Validation_Accuracy: 0.6819 Validation_Loss: 0.8508
Epoch: 2 Total_Time: 0.7747 Average_Time_per_batch: 0.0022 Train_Accuracy: 0.6888 Train_Loss: 0.8481 Validation_Accuracy: 0.6987 Validation_Loss: 0.8153
Epoch: 3 Total_Time: 0.8477 Average_Time_per_batch: 0.0025 Train_Accuracy: 0.6984 Train_Loss: 0.8241 Validation_Accuracy: 0.7062 Validation_Loss: 0.8014
Epoch: 4 Total_Time: 0.8469 Average_Time_per_batch: 0.0025 Train_Accuracy: 0.7056 Train_Loss: 0.8098 Validation_Accuracy: 0.7123 Validation_Loss: 0.7864
Epoch: 5 Total_Time: 0.8623 Average_Time_per_batch: 0.0025 Train_Accuracy: 0.7116 Train_Loss: 0.7976 Validation_Accuracy: 0.7185 Validation_Loss: 0.7745
Epoch: 6 Total_Time: 0.8296 Average_Time_per_batch: 0.0024 Train_Accuracy: 0.7168 Train_Loss: 0.7849 Validation_Accuracy: 0.7214 Validation_Loss: 0.7671
Epoch: 7 Total_Time: 1.0916 Average_Time_per_batch: 0.0032 Train_Accuracy: 0.7220