In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder


In [2]:
np.random.seed(42)  # Set the seed for NumPy

# Set the seed for PyTorch (CPU and GPU)
torch.manual_seed(42)

# If you're using CUDA (GPU), set the seed for CUDA as well
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)  # For all GPUs (if you have more than one)

In [3]:
# 1. Download the dataset (OpenML: id 180)
dataset = openml.datasets.get_dataset(180)
X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute, dataset_format='dataframe')

In [4]:
# 2. Preprocessing: Numeric features only, standardize, encode labels (classes are 1-7)
X_numeric = X.select_dtypes(include=[np.number])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numeric.values.astype(np.float32))

le = LabelEncoder()
y_encoded = le.fit_transform(y)  # Converts to 0...6
y_encoded = y_encoded.astype(np.int64)

In [5]:
# 3. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [6]:
# 4. PyTorch Dataset
class CovertypeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = CovertypeDataset(X_train, y_train)
test_ds = CovertypeDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=256, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=256)

In [7]:
from dpn_4.dpn import DPN
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DPN(X_train.shape[1], 192 + len(le.classes_), len(le.classes_), False).to(device)
model.compile()

In [8]:
# 6. Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [9]:
from utils import train

model = model
train_loader = train_loader
val_loader = test_loader
optimizer = optimizer
criterion = criterion
device = device

percent = 0.99
rounds = 100

In [10]:
def prune_by_percent_once(percent, mask, final_weight):
    # Get the absolute values of weights where mask == 1
    masked_weights = final_weight[mask == 1].abs()

    # Sort the unmasked weights
    sorted_weights, _ = torch.sort(masked_weights)
    if sorted_weights.shape[0] != 0:
        # Determine the cutoff index for pruning
        cutoff_index = int(round(percent * sorted_weights.shape[0]))
        cutoff = sorted_weights[cutoff_index]

        # Prune all weights below or equal to the cutoff
        if torch.rand(1).item() < 0.3:
            new_mask = torch.where(final_weight.abs() <= cutoff, torch.zeros_like(mask), mask)
        else:
            for i in range(final_weight.shape[0]):
                tensor = final_weight[i]
                last_nonzero_index = torch.nonzero(tensor)
                if len(last_nonzero_index) > 0:
                    last_nonzero_index = last_nonzero_index[-1].item()
                    if final_weight[i, last_nonzero_index].abs() <= cutoff:
                        mask[i, last_nonzero_index] = 0
                    
            new_mask = mask
    else:
        new_mask = mask

    return new_mask

In [11]:
def prune_by_percent(model, masks, percent):

    blocks = model.weights
    for i in range(len(blocks)):
        masks[i] = prune_by_percent_once(percent, masks[i], blocks[i])

    return masks

In [12]:
def clean_weights(weights, size_limit):
    zero_indices = [i + size_limit for i, t in enumerate(weights) if torch.all(t == 0)]
    print(zero_indices)


    while len(zero_indices) > 0:
        weights = [t for t in weights if not torch.all(t == 0)]

        for i in range(len(weights)):
            tensor = weights[i]
            columns_to_keep = [i for i in range(tensor.shape[1]) if i not in zero_indices]
            weights[i] = tensor[:, columns_to_keep]

        zero_indices = [i + size_limit for i, t in enumerate(weights) if torch.all(t == 0)]
        print(zero_indices)

    return weights

In [13]:
def merge_weights(weights, size_limit):
    merged_weights = []
    start_idx = 0
    for i in range(1, len(weights)):
        nonzero_idx = torch.nonzero(weights[i])[-1][1].item()

        if nonzero_idx >= size_limit:

            merged_weights.append(torch.cat([t[:, :size_limit] for t in weights[start_idx:i]], dim=0))
            
            size_limit = weights[i].shape[1]
            start_idx = i

    merged_weights.append(torch.cat([t[:, :size_limit] for t in weights[start_idx:]], dim=0))
    return merged_weights

In [14]:
original_weights = [param.clone().detach() for param in model.weights]
current_masks = [torch.ones_like(param) for param in original_weights]
p_per_round = 1 - (1 - percent) ** (1 / rounds)
final_weights = [param.clone().detach() for param in model.weights]

In [15]:
_, val_metrics = train(model, train_loader, val_loader, 5, optimizer, criterion, device=device)
val_accuracy = val_metrics[-1][1]


Epoch: 1 Total_Time: 19.1718 Average_Time_per_batch: 0.0556 Train_Accuracy: 0.6576 Train_Loss: 0.9413 Validation_Accuracy: 0.6929 Validation_Loss: 0.8268
Epoch: 2 Total_Time: 18.9288 Average_Time_per_batch: 0.0549 Train_Accuracy: 0.6980 Train_Loss: 0.8217 Validation_Accuracy: 0.7055 Validation_Loss: 0.7944
Epoch: 3 Total_Time: 18.7201 Average_Time_per_batch: 0.0543 Train_Accuracy: 0.7119 Train_Loss: 0.7927 Validation_Accuracy: 0.7196 Validation_Loss: 0.7718
Epoch: 4 Total_Time: 18.2614 Average_Time_per_batch: 0.0529 Train_Accuracy: 0.7235 Train_Loss: 0.7711 Validation_Accuracy: 0.7289 Validation_Loss: 0.7562
Epoch: 5 Total_Time: 18.6905 Average_Time_per_batch: 0.0542 Train_Accuracy: 0.7329 Train_Loss: 0.7529 Validation_Accuracy: 0.7368 Validation_Loss: 0.7401Peak GPU memory: 18.96 MB


In [16]:
for round_idx in range(rounds):

    current_masks = prune_by_percent(model, current_masks, p_per_round)
    
    pruned_weights = [w * m for w, m in zip(original_weights, current_masks)]
    
    size_limit = pruned_weights[0].shape[1]
    cleaned_weights = clean_weights(pruned_weights, size_limit)
    merged_weights = merge_weights(cleaned_weights, size_limit)
    new_total_nodes = sum([w.shape[0] for w in merged_weights])

    model = DPN(X_train.shape[1], new_total_nodes, len(le.classes_), False).to(device)
    model.compile()
    model.weights = nn.ParameterList([nn.Parameter(w) for w in merged_weights])
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    original_weights = [param.clone().detach() for param in model.weights]
    current_masks = [(param != 0).int() for param in merged_weights]

    _, val_metrics = train(model, train_loader, val_loader, 5, optimizer, criterion, device=device)
    new_val_accuracy = val_metrics[-1][1]
    
    if new_val_accuracy >= val_accuracy:
        #val_accuracy = new_val_accuracy
        final_weights = [param.clone().detach() for param in model.weights]  # This creates a shallow copy of the list
    else:
        break

[]



Epoch: 1 Total_Time: 17.4940 Average_Time_per_batch: 0.0507 Train_Accuracy: 0.6586 Train_Loss: 0.9402 Validation_Accuracy: 0.6939 Validation_Loss: 0.8227
Epoch: 2 Total_Time: 17.1883 Average_Time_per_batch: 0.0498 Train_Accuracy: 0.6998 Train_Loss: 0.8202 Validation_Accuracy: 0.7078 Validation_Loss: 0.7869
Epoch: 3 Total_Time: 18.6190 Average_Time_per_batch: 0.0540 Train_Accuracy: 0.7142 Train_Loss: 0.7885 Validation_Accuracy: 0.7182 Validation_Loss: 0.7676
Epoch: 4 Total_Time: 18.8510 Average_Time_per_batch: 0.0546 Train_Accuracy: 0.7262 Train_Loss: 0.7645 Validation_Accuracy: 0.7304 Validation_Loss: 0.7490
Epoch: 5 Total_Time: 18.6797 Average_Time_per_batch: 0.0541 Train_Accuracy: 0.7371 Train_Loss: 0.7448 Validation_Accuracy: 0.7405 Validation_Loss: 0.7343Peak GPU memory: 19.09 MB
[]

Epoch: 1 Total_Time: 17.8787 Average_Time_per_batch: 0.0518 Train_Accuracy: 0.6598 Train_Loss: 0.9300 Validation_Accuracy: 0.6958 Validation_Loss: 0.8199
Epoch: 2 Total_Time: 18.4796 Average_Time_per_

In [None]:
size_limit = pruned_weights[0].shape[1]


In [None]:
sizes = [w.shape[0] for w in merged_weights]
print(sum(sizes))

141


In [None]:
model = DPN(X_train.shape[1], sum(sizes), len(le.classes_), False).to(device)
model.compile()
model.weights = nn.ParameterList([nn.Parameter(w) for w in merged_weights])
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
train_metrics, eval_metrics = train(model, train_loader, test_loader, 50, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 9.2747 Average_Time_per_batch: 0.0269 Train_Accuracy: 0.6638 Train_Loss: 0.9003 Validation_Accuracy: 0.6922 Validation_Loss: 0.8187
Epoch: 2 Total_Time: 9.4406 Average_Time_per_batch: 0.0274 Train_Accuracy: 0.6947 Train_Loss: 0.8267 Validation_Accuracy: 0.7078 Validation_Loss: 0.7882
Epoch: 3 Total_Time: 9.4281 Average_Time_per_batch: 0.0273 Train_Accuracy: 0.7064 Train_Loss: 0.8025 Validation_Accuracy: 0.7158 Validation_Loss: 0.7721
Epoch: 4 Total_Time: 9.3329 Average_Time_per_batch: 0.0271 Train_Accuracy: 0.7144 Train_Loss: 0.7860 Validation_Accuracy: 0.7208 Validation_Loss: 0.7609
Epoch: 5 Total_Time: 9.3955 Average_Time_per_batch: 0.0272 Train_Accuracy: 0.7208 Train_Loss: 0.7729 Validation_Accuracy: 0.7231 Validation_Loss: 0.7525
Epoch: 6 Total_Time: 9.4287 Average_Time_per_batch: 0.0273 Train_Accuracy: 0.7261 Train_Loss: 0.7623 Validation_Accuracy: 0.7282 Validation_Loss: 0.7433
Epoch: 7 Total_Time: 9.3787 Average_Time_per_batch: 0.0272 Train_Accuracy: 0.7310

In [None]:
print(sizes)

[7, 4, 4, 2, 4, 11, 2, 2, 4, 5, 4, 3, 3, 1, 2, 5, 3, 2, 3, 4, 4, 1, 4, 1, 2, 3, 5, 1, 1, 1, 3, 6, 2, 3, 1, 2, 4, 2, 1, 6, 3, 1, 2, 5, 2]
