In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import copy

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
class Maxout(nn.Module):
    def __init__(self, in_features, out_features, pool_size=2, batch_norm=True):
        super(Maxout, self).__init__()
        self.out_features = out_features
        self.pool_size = pool_size
        self.linear = nn.ModuleList([nn.Linear(in_features, out_features) for _ in range(pool_size)])
        if batch_norm:
            self.bn = nn.ModuleList([nn.BatchNorm1d(out_features) for _ in range(pool_size)])
        else:
            self.bn = nn.ModuleList([nn.Identity() for _ in range(pool_size)])

    def forward(self, x):
        maxout_layers = torch.stack([self.bn[i](layer(x)) for i, layer in enumerate(self.linear)], dim=2) 
        maxout_output = torch.max(maxout_layers, dim=2).values
        return maxout_output

class MaxPlus(nn.Module):
    def __init__(self, in_features, out_features, bias=True, init="full"):
        super(MaxPlus, self).__init__()
        self.bias = bias
        if init == "full":
            self.weight = nn.Parameter(torch.randn(out_features, in_features))
        else:
            w = torch.randn(out_features, in_features)
            mask = torch.rand(out_features, in_features) > init/(out_features * in_features)
            w = w - 1e9 * mask
            self.weight = nn.Parameter(w)
        self.b = nn.Parameter(torch.randn(out_features))

    def forward(self, x):
        x = x.unsqueeze(1) 
        x = x + self.weight
        if self.bias:
            x = torch.cat([x, self.b.view(1,-1,1).repeat(x.size(0),1,1)], dim=2)
        x, _ = torch.max(x, dim=2)
        return x
    
class Conv_2d(nn.Module):
    def __init__(self, input_channels, output_channels, shape=3, stride=1, pooling=2):
        super(Conv_2d, self).__init__()
        self.conv = nn.Conv2d(input_channels, output_channels, shape, stride=stride, padding=shape//2)
        self.bn = nn.BatchNorm2d(output_channels)
        self.relu = nn.ReLU()
        self.mp = nn.MaxPool2d(pooling)
    def forward(self, x):
        out = self.mp(self.relu(self.bn(self.conv(x))))
        return out
    
# Define the MLP model
class MLP(nn.Module):
    # def __init__(self, n_in=28*28, n_class=10, n_hid=256, method="relu"):
    def __init__(self, n_channels=128, n_class=10, n_hid=512, method="relu"):
        super(MLP, self).__init__()
        self.layer1 = Conv_2d(3, n_channels, pooling=2)
        self.layer2 = Conv_2d(n_channels, n_channels, pooling=2)
        self.layer3 = Conv_2d(n_channels, n_channels*2, pooling=2)
        self.layer4 = Conv_2d(n_channels*2, n_channels*2, pooling=2)
        self.layer5 = Conv_2d(n_channels*2, n_channels*2, pooling=2)
        # self.layer6 = Conv_2d(n_channels*2, n_channels*2, pooling=2)
        # self.layer7 = Conv_2d(n_channels*2, n_channels*4, pooling=2)
        if method == "relu":
            self.dense1 = nn.Linear(n_channels*2, n_hid)
            self.bn = nn.BatchNorm1d(n_hid)
            self.dense2 = nn.Linear(n_hid, n_class)
            self.dropout = nn.Dropout(0.5)
            self.relu = nn.ReLU()
        elif method == "maxout": 
            self.dense1 = Maxout(n_channels*2, n_hid, pool_size=2, batch_norm=True)
            self.bn = nn.Identity()
            self.dense2 = nn.Linear(n_hid, n_class)
            self.dropout = nn.Dropout(0.5)
            self.relu = nn.Identity()
        elif method == "zhang":
            self.dense1 = nn.Linear(n_channels*2, n_hid, bias=False)
            self.bn = nn.Identity()
            self.dropout = nn.Dropout(0.5)
            self.dense2 = MaxPlus(n_hid, n_class)
            self.relu = nn.ReLU()
        elif method == "lmpl": 
            self.dense1 = nn.Linear(n_channels*2, n_hid, bias=False)
            self.relu = MaxPlus(n_hid, n_hid)
            self.bn = nn.BatchNorm1d(n_hid)
            self.dense2 = nn.Linear(n_hid, n_class)
            self.dropout = nn.Identity()
        elif method == "lmpl2":
            self.dense1 = nn.Linear(n_channels*2, n_hid, bias=False)
            self.relu = MaxPlus(n_hid, n_hid, init=2*n_hid)
            self.bn = nn.BatchNorm1d(n_hid)
            self.dense2 = nn.Linear(n_hid, n_class)
            self.dropout = nn.Identity()
        else:
            print("Invalid method!!!")
            exit(-1)
    def forward(self, x):
        # x = x.view(-1, 28*28)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        # x = self.layer6(x)
        # x = self.layer7(x)
        x = x.squeeze(2)

        # Global Max Pooling
        if x.size(-1) != 1:
            x = nn.MaxPool1d(x.size(-1))(x)
        x = x.squeeze(2)
        
        x = self.dense1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.dense2(x)

        return x

In [6]:
def prune_weights(model, pruning_ratio=[0.2, 0.2], method = "relu"):
    model = copy.deepcopy(model)  # Clone model to avoid modifying the original

    def prune_layer_linear(layer, r, extra=0):
        """Zero out the smallest magnitude weights in 'layer'."""
        with torch.no_grad():
            weight = layer.weight.data
            num_to_prune = int(r * weight.numel()) + extra

            # Get absolute values and sort
            flat_weights = weight.abs().flatten()
            threshold = torch.topk(flat_weights, num_to_prune, largest=False).values.max()

            # Zero out weights below the threshold
            weight[weight.abs() <= threshold] = 0.0

            remaining = weight.numel()
            if layer.bias is not None:
                remaining += layer.bias.data.numel()
            remaining -= num_to_prune
            return remaining

    def prune_layer_maxplus(layer, r, extra=0):
        """Zero out the smallest magnitude weights in 'layer'."""
        with torch.no_grad():
            weight = layer.weight.data
            num_to_prune = int(r * weight.numel()) + extra

            # Get absolute values and so
            flat_weights = weight.flatten()
            threshold = torch.topk(flat_weights, num_to_prune, largest=False).values.max()

            # Zero out weights below the threshold
            weight[weight <= threshold] = -1e9

            remaining = weight.numel()
            if layer.bias:
                remaining += layer.b.data.numel()
            remaining -= num_to_prune
            return remaining

    # Apply pruning to all linear layers in the model
    total_params = 0
    if method == "relu":
        total_params += prune_layer_linear(model.dense1, pruning_ratio[0])
        total_params += prune_layer_linear(model.dense2, pruning_ratio[1])  
    elif method == "maxout":
        for lin_layer in model.dense1.linear:
            total_params += prune_layer_linear(lin_layer, 1-(1-pruning_ratio[0])/len(model.dense1.linear), extra=lin_layer.bias.size(0)//2)
        total_params += prune_layer_linear(model.dense2, pruning_ratio[1])
    elif method == "lmpl":
        total_params += prune_layer_linear(model.dense1, 1-(1-pruning_ratio[0])/2)
        total_params += prune_layer_maxplus(model.relu, 1-(1-pruning_ratio[0])/2)
        total_params += prune_layer_linear(model.dense2, pruning_ratio[1])
    elif method == "lmpl2":
        total_params += prune_layer_linear(model.dense1, pruning_ratio[0], extra=2*model.dense1.weight.size(0))
        total_params += 3*model.dense1.weight.size(0)
        total_params += prune_layer_linear(model.dense2, pruning_ratio[1])

    return model, total_params  # Return the pruned model

In [None]:
# Load MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])
dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# Split dataset into training (80%) and validation (20%)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
trainset, valset = random_split(dataset, [train_size, val_size])

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(trainset, batch_size=64, shuffle=True)
val_loader = DataLoader(valset, batch_size=64, shuffle=False)
test_loader = DataLoader(testset, batch_size=64, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def train(model, criterion, optimizer, epochs=10):
    best_model=None
    best_accuracy = -float('inf')
    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_model = copy.deepcopy(model)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {val_accuracy:.2f}%")

    return best_model

def test(model):
    # Evaluate the model on test set
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")
    return f"Test Accuracy: {100 * correct / total:.2f}%\n"

In [19]:
log_string=""
print(log_string)




In [16]:
# Initialize model, loss function, and optimizer
models = [MLP().to(device) for _ in range(5)]

for i in range(5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(models[i].parameters(), lr=0.001)
    models[i] = train(models[i], criterion, optimizer)

Epoch 1/10, Loss: 1.2419, Val Loss: 1.0298, Val Accuracy: 63.34%
Epoch 2/10, Loss: 0.8265, Val Loss: 0.9422, Val Accuracy: 67.18%
Epoch 3/10, Loss: 0.6568, Val Loss: 1.1562, Val Accuracy: 63.80%
Epoch 4/10, Loss: 0.5303, Val Loss: 0.7592, Val Accuracy: 74.46%
Epoch 5/10, Loss: 0.4298, Val Loss: 0.7534, Val Accuracy: 74.77%
Epoch 6/10, Loss: 0.3439, Val Loss: 0.7948, Val Accuracy: 75.09%
Epoch 7/10, Loss: 0.2698, Val Loss: 0.7791, Val Accuracy: 76.79%
Epoch 8/10, Loss: 0.2039, Val Loss: 0.8073, Val Accuracy: 76.80%
Epoch 9/10, Loss: 0.1692, Val Loss: 0.8060, Val Accuracy: 78.09%
Epoch 10/10, Loss: 0.1419, Val Loss: 0.8429, Val Accuracy: 78.10%
Epoch 1/10, Loss: 1.2250, Val Loss: 1.0907, Val Accuracy: 61.76%
Epoch 2/10, Loss: 0.8251, Val Loss: 0.8633, Val Accuracy: 69.90%
Epoch 3/10, Loss: 0.6554, Val Loss: 0.7837, Val Accuracy: 72.93%
Epoch 4/10, Loss: 0.5366, Val Loss: 0.7475, Val Accuracy: 74.56%
Epoch 5/10, Loss: 0.4374, Val Loss: 0.7208, Val Accuracy: 76.19%
Epoch 6/10, Loss: 0.3427

In [17]:
for i in range(5):
    test(models[i])

Test Accuracy: 77.71%
Test Accuracy: 79.00%
Test Accuracy: 79.95%
Test Accuracy: 79.53%
Test Accuracy: 78.73%


In [20]:
for i in range(5):
    for r1 in [0.7, 0.8, 0.9, 0.95]:
        for r2 in [0.7, 0.8, 0.9, 0.95]:
            compressed_model, total_params = prune_weights(models[i], [r1, r2], "relu")

            print(f"Compressed model relu {i}, {r1}, {r2} total params:", total_params)
            log_string += f"Compressed model relu {i}, {r1}, {r2} total params: {total_params}\n"
            log_string += test(compressed_model)

Compressed model relu 0, 0.7, 0.7 total params: 41380
Test Accuracy: 68.14%
Compressed model relu 0, 0.7, 0.8 total params: 40868
Test Accuracy: 57.70%
Compressed model relu 0, 0.7, 0.9 total params: 40356
Test Accuracy: 45.10%
Compressed model relu 0, 0.7, 0.95 total params: 40100
Test Accuracy: 37.60%
Compressed model relu 0, 0.8, 0.7 total params: 28273
Test Accuracy: 55.71%
Compressed model relu 0, 0.8, 0.8 total params: 27761
Test Accuracy: 41.63%
Compressed model relu 0, 0.8, 0.9 total params: 27249
Test Accuracy: 27.15%
Compressed model relu 0, 0.8, 0.95 total params: 26993
Test Accuracy: 28.96%
Compressed model relu 0, 0.9, 0.7 total params: 15166
Test Accuracy: 32.20%
Compressed model relu 0, 0.9, 0.8 total params: 14654
Test Accuracy: 20.54%
Compressed model relu 0, 0.9, 0.9 total params: 14142
Test Accuracy: 15.73%
Compressed model relu 0, 0.9, 0.95 total params: 13886
Test Accuracy: 17.45%
Compressed model relu 0, 0.95, 0.7 total params: 8612
Test Accuracy: 25.45%
Compresse

In [21]:
# Initialize model, loss function, and optimizer
models = [MLP(method="maxout").to(device) for _ in range(5)]

for i in range(5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(models[i].parameters(), lr=0.001)
    models[i] = train(models[i], criterion, optimizer)

Epoch 1/10, Loss: 1.2680, Val Loss: 1.0990, Val Accuracy: 61.46%
Epoch 2/10, Loss: 0.8331, Val Loss: 0.7985, Val Accuracy: 72.46%
Epoch 3/10, Loss: 0.6630, Val Loss: 0.8484, Val Accuracy: 72.16%
Epoch 4/10, Loss: 0.5405, Val Loss: 0.7176, Val Accuracy: 76.00%
Epoch 5/10, Loss: 0.4306, Val Loss: 0.9502, Val Accuracy: 70.19%
Epoch 6/10, Loss: 0.3462, Val Loss: 0.8095, Val Accuracy: 74.43%
Epoch 7/10, Loss: 0.2660, Val Loss: 1.1766, Val Accuracy: 70.10%
Epoch 8/10, Loss: 0.2060, Val Loss: 0.8880, Val Accuracy: 76.19%
Epoch 9/10, Loss: 0.1701, Val Loss: 0.9158, Val Accuracy: 76.63%
Epoch 10/10, Loss: 0.1439, Val Loss: 0.9742, Val Accuracy: 76.92%
Epoch 1/10, Loss: 1.2700, Val Loss: 0.9832, Val Accuracy: 64.60%
Epoch 2/10, Loss: 0.8427, Val Loss: 0.8819, Val Accuracy: 69.46%
Epoch 3/10, Loss: 0.6634, Val Loss: 0.7184, Val Accuracy: 75.18%
Epoch 4/10, Loss: 0.5458, Val Loss: 0.7052, Val Accuracy: 75.87%
Epoch 5/10, Loss: 0.4467, Val Loss: 0.7862, Val Accuracy: 74.82%
Epoch 6/10, Loss: 0.3453

In [22]:
for i in range(5):
    test(models[i])

Test Accuracy: 77.26%
Test Accuracy: 77.06%
Test Accuracy: 79.08%
Test Accuracy: 79.00%
Test Accuracy: 78.47%


In [23]:
for i in range(5):
    for r1 in [0.7, 0.8, 0.9, 0.95]:
        for r2 in [0.7, 0.8, 0.9, 0.95]:
            compressed_model, total_params = prune_weights(models[i], [r1, r2], "maxout")

            print(f"Compressed model maxout {i}, {r1}, {r2} total params:", total_params)
            log_string += f"Compressed model maxout {i}, {r1}, {r2} total params: {total_params}\n"
            log_string += test(compressed_model)

Compressed model maxout 0, 0.7, 0.7 total params: 41380
Test Accuracy: 42.72%
Compressed model maxout 0, 0.7, 0.8 total params: 40868
Test Accuracy: 41.16%
Compressed model maxout 0, 0.7, 0.9 total params: 40356
Test Accuracy: 33.52%
Compressed model maxout 0, 0.7, 0.95 total params: 40100
Test Accuracy: 28.43%
Compressed model maxout 0, 0.8, 0.7 total params: 28274
Test Accuracy: 28.78%
Compressed model maxout 0, 0.8, 0.8 total params: 27762
Test Accuracy: 27.36%
Compressed model maxout 0, 0.8, 0.9 total params: 27250
Test Accuracy: 22.38%
Compressed model maxout 0, 0.8, 0.95 total params: 26994
Test Accuracy: 19.50%
Compressed model maxout 0, 0.9, 0.7 total params: 15166
Test Accuracy: 24.46%
Compressed model maxout 0, 0.9, 0.8 total params: 14654
Test Accuracy: 22.56%
Compressed model maxout 0, 0.9, 0.9 total params: 14142
Test Accuracy: 16.26%
Compressed model maxout 0, 0.9, 0.95 total params: 13886
Test Accuracy: 13.90%
Compressed model maxout 0, 0.95, 0.7 total params: 8612
Test 

In [24]:
# Initialize model, loss function, and optimizer
models = [MLP(method="lmpl").to(device) for _ in range(5)]

for i in range(5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(models[i].parameters(), lr=0.001)
    models[i] = train(models[i], criterion, optimizer)

Epoch 1/10, Loss: 1.3935, Val Loss: 1.0841, Val Accuracy: 64.01%
Epoch 2/10, Loss: 0.9301, Val Loss: 0.9242, Val Accuracy: 68.12%
Epoch 3/10, Loss: 0.7390, Val Loss: 0.9412, Val Accuracy: 69.86%
Epoch 4/10, Loss: 0.6144, Val Loss: 0.8025, Val Accuracy: 72.79%
Epoch 5/10, Loss: 0.5250, Val Loss: 0.8739, Val Accuracy: 73.40%
Epoch 6/10, Loss: 0.4335, Val Loss: 0.7444, Val Accuracy: 76.95%
Epoch 7/10, Loss: 0.3527, Val Loss: 0.8290, Val Accuracy: 76.08%
Epoch 8/10, Loss: 0.2748, Val Loss: 0.8715, Val Accuracy: 76.25%
Epoch 9/10, Loss: 0.2264, Val Loss: 0.7936, Val Accuracy: 78.37%
Epoch 10/10, Loss: 0.1836, Val Loss: 0.9287, Val Accuracy: 76.70%
Epoch 1/10, Loss: 1.4117, Val Loss: 1.2389, Val Accuracy: 58.38%
Epoch 2/10, Loss: 0.9228, Val Loss: 0.8923, Val Accuracy: 69.75%
Epoch 3/10, Loss: 0.7519, Val Loss: 0.9210, Val Accuracy: 69.58%
Epoch 4/10, Loss: 0.6112, Val Loss: 0.8010, Val Accuracy: 73.84%
Epoch 5/10, Loss: 0.5390, Val Loss: 1.1610, Val Accuracy: 66.68%
Epoch 6/10, Loss: 0.4405

In [25]:
for i in range(5):
    test(models[i])

Test Accuracy: 78.16%
Test Accuracy: 77.81%
Test Accuracy: 78.48%
Test Accuracy: 76.40%
Test Accuracy: 78.06%


In [26]:
for i in range(5):
    for r1 in [0.7, 0.8, 0.9, 0.95]:
        for r2 in [0.7, 0.8, 0.9, 0.95]:
            compressed_model, total_params = prune_weights(models[i], [r1, r2], "lmpl")

            print(f"Compressed model lmpl {i}, {r1}, {r2} total params:", total_params)
            log_string += f"Compressed model lmpl {i}, {r1}, {r2} total params: {total_params}\n"
            log_string += test(compressed_model)

Compressed model lmpl 0, 0.7, 0.7 total params: 61041


Test Accuracy: 25.52%
Compressed model lmpl 0, 0.7, 0.8 total params: 60529
Test Accuracy: 13.17%
Compressed model lmpl 0, 0.7, 0.9 total params: 60017
Test Accuracy: 29.91%
Compressed model lmpl 0, 0.7, 0.95 total params: 59761
Test Accuracy: 32.33%
Compressed model lmpl 0, 0.8, 0.7 total params: 41381
Test Accuracy: 17.40%
Compressed model lmpl 0, 0.8, 0.8 total params: 40869
Test Accuracy: 10.07%
Compressed model lmpl 0, 0.8, 0.9 total params: 40357
Test Accuracy: 22.20%
Compressed model lmpl 0, 0.8, 0.95 total params: 40101
Test Accuracy: 25.41%
Compressed model lmpl 0, 0.9, 0.7 total params: 21720
Test Accuracy: 18.99%
Compressed model lmpl 0, 0.9, 0.8 total params: 21208
Test Accuracy: 10.33%
Compressed model lmpl 0, 0.9, 0.9 total params: 20696
Test Accuracy: 19.98%
Compressed model lmpl 0, 0.9, 0.95 total params: 20440
Test Accuracy: 19.96%
Compressed model lmpl 0, 0.95, 0.7 total params: 11889
Test Accuracy: 16.96%
Compressed model lmpl 0, 0.95, 0.8 total params: 11377
Test Ac

In [27]:
# Initialize model, loss function, and optimizer
models = [MLP(method="lmpl2").to(device) for _ in range(5)]

for i in range(5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(models[i].parameters(), lr=0.001)
    models[i] = train(models[i], criterion, optimizer)

Epoch 1/10, Loss: 1.1837, Val Loss: 1.1262, Val Accuracy: 60.13%
Epoch 2/10, Loss: 0.7891, Val Loss: 0.9199, Val Accuracy: 69.41%
Epoch 3/10, Loss: 0.6274, Val Loss: 0.9112, Val Accuracy: 70.06%
Epoch 4/10, Loss: 0.5025, Val Loss: 0.7259, Val Accuracy: 75.62%
Epoch 5/10, Loss: 0.4006, Val Loss: 0.7853, Val Accuracy: 75.16%
Epoch 6/10, Loss: 0.3091, Val Loss: 0.7633, Val Accuracy: 77.32%
Epoch 7/10, Loss: 0.2390, Val Loss: 0.8621, Val Accuracy: 76.51%
Epoch 8/10, Loss: 0.1833, Val Loss: 0.9884, Val Accuracy: 74.56%
Epoch 9/10, Loss: 0.1442, Val Loss: 0.8629, Val Accuracy: 77.38%
Epoch 10/10, Loss: 0.1209, Val Loss: 1.0803, Val Accuracy: 74.80%
Epoch 1/10, Loss: 1.1765, Val Loss: 1.1361, Val Accuracy: 60.29%
Epoch 2/10, Loss: 0.7821, Val Loss: 0.8322, Val Accuracy: 71.58%
Epoch 3/10, Loss: 0.6241, Val Loss: 0.6958, Val Accuracy: 76.05%
Epoch 4/10, Loss: 0.5069, Val Loss: 0.7666, Val Accuracy: 74.37%
Epoch 5/10, Loss: 0.3945, Val Loss: 0.8209, Val Accuracy: 74.20%
Epoch 6/10, Loss: 0.2987

In [28]:
for i in range(5):
    test(models[i])

Test Accuracy: 77.87%
Test Accuracy: 79.04%
Test Accuracy: 77.83%
Test Accuracy: 78.78%
Test Accuracy: 78.08%


In [29]:
for i in range(5):
    for r1 in [0.7, 0.8, 0.9, 0.95]:
        for r2 in [0.7, 0.8, 0.9, 0.95]:
            compressed_model, total_params = prune_weights(models[i], [r1, r2], "lmpl2")

            print(f"Compressed model lmpl2 {i}, {r1}, {r2} total params:", total_params)
            log_string += f"Compressed model lmpl2 {i}, {r1}, {r2} total params: {total_params}\n"
            log_string += test(compressed_model)

Compressed model lmpl2 0, 0.7, 0.7 total params: 41380
Test Accuracy: 74.56%
Compressed model lmpl2 0, 0.7, 0.8 total params: 40868
Test Accuracy: 73.97%
Compressed model lmpl2 0, 0.7, 0.9 total params: 40356
Test Accuracy: 75.09%
Compressed model lmpl2 0, 0.7, 0.95 total params: 40100
Test Accuracy: 69.76%
Compressed model lmpl2 0, 0.8, 0.7 total params: 28273
Test Accuracy: 62.11%
Compressed model lmpl2 0, 0.8, 0.8 total params: 27761
Test Accuracy: 61.92%
Compressed model lmpl2 0, 0.8, 0.9 total params: 27249
Test Accuracy: 66.02%
Compressed model lmpl2 0, 0.8, 0.95 total params: 26993
Test Accuracy: 58.10%
Compressed model lmpl2 0, 0.9, 0.7 total params: 15166
Test Accuracy: 25.97%
Compressed model lmpl2 0, 0.9, 0.8 total params: 14654
Test Accuracy: 27.20%
Compressed model lmpl2 0, 0.9, 0.9 total params: 14142
Test Accuracy: 33.31%
Compressed model lmpl2 0, 0.9, 0.95 total params: 13886
Test Accuracy: 24.71%
Compressed model lmpl2 0, 0.95, 0.7 total params: 8612
Test Accuracy: 12.

In [30]:
print(log_string)

Compressed model relu 0, 0.7, 0.7 total params: 41380
Test Accuracy: 68.14%
Compressed model relu 0, 0.7, 0.8 total params: 40868
Test Accuracy: 57.70%
Compressed model relu 0, 0.7, 0.9 total params: 40356
Test Accuracy: 45.10%
Compressed model relu 0, 0.7, 0.95 total params: 40100
Test Accuracy: 37.60%
Compressed model relu 0, 0.8, 0.7 total params: 28273
Test Accuracy: 55.71%
Compressed model relu 0, 0.8, 0.8 total params: 27761
Test Accuracy: 41.63%
Compressed model relu 0, 0.8, 0.9 total params: 27249
Test Accuracy: 27.15%
Compressed model relu 0, 0.8, 0.95 total params: 26993
Test Accuracy: 28.96%
Compressed model relu 0, 0.9, 0.7 total params: 15166
Test Accuracy: 32.20%
Compressed model relu 0, 0.9, 0.8 total params: 14654
Test Accuracy: 20.54%
Compressed model relu 0, 0.9, 0.9 total params: 14142
Test Accuracy: 15.73%
Compressed model relu 0, 0.9, 0.95 total params: 13886
Test Accuracy: 17.45%
Compressed model relu 0, 0.95, 0.7 total params: 8612
Test Accuracy: 25.45%
Compresse

In [31]:
import re
import numpy as np

def parse_log(log_string):
    # Split the log string into lines
    lines = log_string.strip().split("\n")
    print(lines)
    
    # Regex pattern to extract data
    pattern = re.compile(r"Compressed model (\S+) (\d), (\d\.\d+), (\d\.\d+) total params: (\d+)")
    accuracy_pattern = re.compile(r"Test Accuracy: (\d+\.\d+)%")
    
    parsed_data = []
    
    for i in range(0, len(lines), 2):  # Every two lines form one entry
        match = pattern.match(lines[i])
        accuracy_match = accuracy_pattern.match(lines[i + 1])
        
        if match and accuracy_match:
            method, _, r1, r2, params = match.groups()
            accuracy = accuracy_match.group(1)
            parsed_data.append((method, float(r1), float(r2), int(params), float(accuracy)))
    return parsed_data

def generate_latex_table(parsed_data):
    # Organize data in a dictionary structure for easy table formatting
    data_dict = {}
    for method, r1, r2, params, accuracy in parsed_data:
        if (r2, r1) not in data_dict:
            data_dict[(r2, r1)] = {}
        if method not in data_dict[(r2, r1)]:
            data_dict[(r2, r1)][method] = []
        data_dict[(r2, r1)][method].append([params, accuracy])

    for p in data_dict.keys():
        r2, r1 = p
        for method in data_dict[(r2, r1)].keys():
            params = data_dict[(r2, r1)][method][0][0]
            mean = np.mean(np.array(data_dict[(r2, r1)][method]), axis=0)[1]
            std = np.std(np.array(data_dict[(r2, r1)][method]), axis=0)[1]
            data_dict[(r2, r1)][method] = (params, mean, std)
    
    # Define methods in order
    methods = ["relu", "maxout", "lmpl", "lmpl2"]
    
    # Start LaTeX table
    latex_table = """
    \\begin{table*}[t]
        \\begin{center}
            \caption{Pruning performance of different methods for a variety of pruning ratios $r_1, r_2$}
            \label{table:2}
            \\vspace{-0.15cm}
            \setlength{\\tabcolsep}{0.5em}
            \\begin{tabular}{llc|cccc}
            \\toprule
            \multicolumn{2}{c}{Pruning ratio} & Params & \multicolumn{4}{c}{Test Accuracy (\%)} \\\\
            \cmidrule(lr){1-2} \cmidrule(lr){3-3} \cmidrule(lr){4-7}
            $r_2$ & $r_1$ & & ReLU & Maxout & Dense-Morph & Sparse-Morph \\\\
            \midrule
    """
    
    # Sort by r2, then r1
    sorted_keys = sorted(data_dict.keys(), reverse=False)
    
    for r2, r1 in sorted_keys:
        params = None
        row_data = [f"{r2} & {r1}"]
        
        for method in methods:
            if method in data_dict[(r2, r1)]:
                params, accuracy, std = data_dict[(r2, r1)][method]
                row_data.append(f"{accuracy:.2f} $\\pm$ {std:.2f}")
            else:
                row_data.append("--")
        
        # Add params only for the first occurrence in each r2 group
        row_text = " & ".join(row_data[:1]) + f" & {params} & " + " & ".join(row_data[1:]) + " \\\\"
        latex_table += row_text + "\n"
    
    # Close LaTeX table
    latex_table += """
            \\bottomrule
            \end{tabular}
        \end{center}
    \end{table*}
    """
    
    return latex_table

parsed_data = parse_log(log_string)
latex_code = generate_latex_table(parsed_data)
print(latex_code)


['Compressed model relu 0, 0.7, 0.7 total params: 41380', 'Test Accuracy: 68.14%', 'Compressed model relu 0, 0.7, 0.8 total params: 40868', 'Test Accuracy: 57.70%', 'Compressed model relu 0, 0.7, 0.9 total params: 40356', 'Test Accuracy: 45.10%', 'Compressed model relu 0, 0.7, 0.95 total params: 40100', 'Test Accuracy: 37.60%', 'Compressed model relu 0, 0.8, 0.7 total params: 28273', 'Test Accuracy: 55.71%', 'Compressed model relu 0, 0.8, 0.8 total params: 27761', 'Test Accuracy: 41.63%', 'Compressed model relu 0, 0.8, 0.9 total params: 27249', 'Test Accuracy: 27.15%', 'Compressed model relu 0, 0.8, 0.95 total params: 26993', 'Test Accuracy: 28.96%', 'Compressed model relu 0, 0.9, 0.7 total params: 15166', 'Test Accuracy: 32.20%', 'Compressed model relu 0, 0.9, 0.8 total params: 14654', 'Test Accuracy: 20.54%', 'Compressed model relu 0, 0.9, 0.9 total params: 14142', 'Test Accuracy: 15.73%', 'Compressed model relu 0, 0.9, 0.95 total params: 13886', 'Test Accuracy: 17.45%', 'Compressed 

In [37]:
tmp = np.array([77.71, 79.00, 79.95, 79.53, 78.73])

print("ReLU", np.mean(tmp), np.std(tmp))

ReLU 78.984 0.7640837650415065


In [35]:
tmp = np.array([77.26, 77.06, 79.08, 79.00, 78.47])

print("Maxout", np.mean(tmp), np.std(tmp))

Maxout 78.174 0.856401774869714


In [34]:
tmp = np.array([78.16, 77.81, 78.48, 76.40, 78.06])

print("Dense-Morph", np.mean(tmp), np.std(tmp))

Dense-Morph 77.78200000000001 0.723613156320418


In [33]:
tmp = np.array([77.87, 79.04, 77.83, 78.78, 78.08])

print("Sparse-Morph", np.mean(tmp), np.std(tmp))

Sparse-Morph 78.32 0.4960241929583694
