In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder


In [2]:
np.random.seed(42)  # Set the seed for NumPy

# Set the seed for PyTorch (CPU and GPU)
torch.manual_seed(42)

# If you're using CUDA (GPU), set the seed for CUDA as well
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)  # For all GPUs (if you have more than one)

In [3]:
# 1. Download the dataset (OpenML: id 180)
dataset = openml.datasets.get_dataset(180)
X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute, dataset_format='dataframe')

In [4]:
# 2. Preprocessing: Numeric features only, standardize, encode labels (classes are 1-7)
X_numeric = X.select_dtypes(include=[np.number])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numeric.values.astype(np.float32))

le = LabelEncoder()
y_encoded = le.fit_transform(y)  # Converts to 0...6
y_encoded = y_encoded.astype(np.int64)

In [5]:
# 3. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [6]:
# 4. PyTorch Dataset
class CovertypeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = CovertypeDataset(X_train, y_train)
test_ds = CovertypeDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=256, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=256)

In [7]:
from dpn_4.dpn import DPN
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DPN(X_train.shape[1], 192 + len(le.classes_), len(le.classes_), False).to(device)
model.compile()

In [8]:
# 6. Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [9]:
from utils import train

model = model
train_loader = train_loader
val_loader = test_loader
optimizer = optimizer
criterion = criterion
device = device

percent = 0.99
rounds = 100

In [10]:
original_weights = [param.clone().detach() for param in model.weights]
final_masks = [torch.ones_like(param) for param in original_weights]
current_masks = [torch.ones_like(param) for param in original_weights]
p_per_round = 1 - (1 - percent) ** (1 / rounds)

In [11]:
def prune_by_percent_once(percent, mask, final_weight):
    # Get the absolute values of weights where mask == 1
    masked_weights = final_weight[mask == 1].abs()

    # Sort the unmasked weights
    sorted_weights, _ = torch.sort(masked_weights)
    if sorted_weights.shape[0] != 0:
        # Determine the cutoff index for pruning
        cutoff_index = int(round(percent * sorted_weights.shape[0]))
        cutoff = sorted_weights[cutoff_index]

        # Prune all weights below or equal to the cutoff
        if torch.rand(1).item() < 0.3:
            new_mask = torch.where(final_weight.abs() <= cutoff, torch.zeros_like(mask), mask)
        else:
            for i in range(final_weight.shape[0]):
                tensor = final_weight[i]
                last_nonzero_index = torch.nonzero(tensor)
                if len(last_nonzero_index) > 0:
                    last_nonzero_index = last_nonzero_index[-1].item()
                    if final_weight[i, last_nonzero_index].abs() <= cutoff:
                        mask[i, last_nonzero_index] = 0
                    
            new_mask = mask
    else:
        new_mask = mask

    return new_mask

In [12]:
def prune_by_percent(model, masks, percent):

    blocks = model.weights
    for i in range(len(blocks)):
        masks[i] = prune_by_percent_once(percent, masks[i], blocks[i])

    return masks

In [13]:
_, val_metrics = train(model, train_loader, val_loader, 5, optimizer, criterion, device=device)
val_accuracy = val_metrics[-1][1]


Epoch: 1 Total_Time: 18.7681 Average_Time_per_batch: 0.0544 Train_Accuracy: 0.6576 Train_Loss: 0.9413 Validation_Accuracy: 0.6929 Validation_Loss: 0.8268
Epoch: 2 Total_Time: 18.8654 Average_Time_per_batch: 0.0547 Train_Accuracy: 0.6980 Train_Loss: 0.8217 Validation_Accuracy: 0.7055 Validation_Loss: 0.7944
Epoch: 3 Total_Time: 19.3298 Average_Time_per_batch: 0.0560 Train_Accuracy: 0.7119 Train_Loss: 0.7927 Validation_Accuracy: 0.7196 Validation_Loss: 0.7718
Epoch: 4 Total_Time: 19.1700 Average_Time_per_batch: 0.0556 Train_Accuracy: 0.7235 Train_Loss: 0.7711 Validation_Accuracy: 0.7289 Validation_Loss: 0.7562
Epoch: 5 Total_Time: 19.2239 Average_Time_per_batch: 0.0557 Train_Accuracy: 0.7329 Train_Loss: 0.7529 Validation_Accuracy: 0.7368 Validation_Loss: 0.7401Peak GPU memory: 18.96 MB


In [14]:
for round_idx in range(rounds):
    current_masks = prune_by_percent(model, current_masks, p_per_round)
    pruned_weights = [w * m for w, m in zip(original_weights, current_masks)]
    model.weights = nn.ParameterList([nn.Parameter(w) for w in pruned_weights])
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    _, val_metrics = train(model, train_loader, val_loader, 5, optimizer, criterion, device=device)
    new_val_accuracy = val_metrics[-1][1]
    
    if new_val_accuracy >= val_accuracy:
        val_accuracy = new_val_accuracy
        final_masks = current_masks.copy()  # This creates a shallow copy of the list


Epoch: 1 Total_Time: 19.1009 Average_Time_per_batch: 0.0554 Train_Accuracy: 0.6597 Train_Loss: 0.9353 Validation_Accuracy: 0.6956 Validation_Loss: 0.8237
Epoch: 2 Total_Time: 19.0101 Average_Time_per_batch: 0.0551 Train_Accuracy: 0.6991 Train_Loss: 0.8177 Validation_Accuracy: 0.7098 Validation_Loss: 0.7892
Epoch: 3 Total_Time: 19.2746 Average_Time_per_batch: 0.0559 Train_Accuracy: 0.7143 Train_Loss: 0.7873 Validation_Accuracy: 0.7245 Validation_Loss: 0.7653
Epoch: 4 Total_Time: 19.1273 Average_Time_per_batch: 0.0554 Train_Accuracy: 0.7245 Train_Loss: 0.7656 Validation_Accuracy: 0.7312 Validation_Loss: 0.7478
Epoch: 5 Total_Time: 18.9817 Average_Time_per_batch: 0.0550 Train_Accuracy: 0.7349 Train_Loss: 0.7483 Validation_Accuracy: 0.7383 Validation_Loss: 0.7356Peak GPU memory: 18.96 MB

Epoch: 1 Total_Time: 19.1792 Average_Time_per_batch: 0.0556 Train_Accuracy: 0.6620 Train_Loss: 0.9292 Validation_Accuracy: 0.6963 Validation_Loss: 0.8222
Epoch: 2 Total_Time: 19.2220 Average_Time_per_bat

In [15]:
pruned_weights = [w * m for w, m in zip(original_weights, final_masks)]
model.weights = nn.ParameterList([nn.Parameter(w) for w in pruned_weights])
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [16]:
train_metrics, eval_metrics = train(model, train_loader, test_loader, 50, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 18.7964 Average_Time_per_batch: 0.0545 Train_Accuracy: 0.6863 Train_Loss: 0.8603 Validation_Accuracy: 0.7089 Validation_Loss: 0.7896
Epoch: 2 Total_Time: 19.1372 Average_Time_per_batch: 0.0555 Train_Accuracy: 0.7209 Train_Loss: 0.7783 Validation_Accuracy: 0.7288 Validation_Loss: 0.7583
Epoch: 3 Total_Time: 18.5799 Average_Time_per_batch: 0.0539 Train_Accuracy: 0.7364 Train_Loss: 0.7492 Validation_Accuracy: 0.7441 Validation_Loss: 0.7322
Epoch: 4 Total_Time: 16.8022 Average_Time_per_batch: 0.0487 Train_Accuracy: 0.7461 Train_Loss: 0.7287 Validation_Accuracy: 0.7535 Validation_Loss: 0.7165
Epoch: 5 Total_Time: 18.9646 Average_Time_per_batch: 0.0550 Train_Accuracy: 0.7544 Train_Loss: 0.7126 Validation_Accuracy: 0.7590 Validation_Loss: 0.7068
Epoch: 6 Total_Time: 18.9326 Average_Time_per_batch: 0.0549 Train_Accuracy: 0.7601 Train_Loss: 0.6999 Validation_Accuracy: 0.7619 Validation_Loss: 0.6985
Epoch: 7 Total_Time: 18.8816 Average_Time_per_batch: 0.0547 Train_Accuracy: