In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.utils.prune as prune
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import pandas as pd
import numpy as np

torch.manual_seed(42)
np.random.seed(42)

In [7]:
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)
print("MNIST dataset loaded with train and test loaders.")

MNIST dataset loaded with train and test loaders.


In [8]:
class SimpleNet(nn.Module):
    def __init__(self, hidden_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(784, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 10)  
    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
def train_model(model, train_loader, epochs=3, device=device):
    criterion = nn.CrossEntropyLoss().to(device) 
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device) 
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}/{epochs} completed on {device}.")
    return model

NameError: name 'cuda' is not defined

In [11]:
# Evaluation function (GPU-optimized)
def evaluate_model(model, test_loader, device=device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)  # Move data to GPU
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = correct / total
    print(f"Evaluated accuracy: {accuracy:.4f} on {device}")
    return accuracy
print("Evaluation function updated for GPU.")

NameError: name 'device' is not defined

In [10]:
# Define expanded parameters for at least 200 rows (GPU-optimized)
hidden_sizes = [16, 32, 64, 128, 256, 512, 1024, 2048]  # 8 values
prune_ratios = [0.2, 0.4, 0.6, 0.8, 0.9]  # 5 values
dataset_complexities = [0.1, 0.3, 0.5, 0.7, 1.0]  # 5 values (MNIST data fractions)
expected_rows = len(hidden_sizes) * len(prune_ratios) * len(dataset_complexities)
print(f"Expected rows: {expected_rows} (approx. 200)")
# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Expected rows: 200 (approx. 200)
Using device: cuda


In [14]:
import os
import pandas as pd

# Initialize data list and counter
data = []
counter = 0

# Generate pruning data with GPU optimization
for hidden_size in hidden_sizes:
    for prune_ratio in prune_ratios:
        for complexity in dataset_complexities:
            # Adjust train_loader size based on complexity
            train_subset = torch.utils.data.Subset(train_dataset, range(int(len(train_dataset) * complexity)))
            train_loader_subset = torch.utils.data.DataLoader(train_subset, batch_size=128, shuffle=True)  # GPU-friendly batch
            print(f"Training model #{counter+1}/{expected_rows}: hidden_size={hidden_size}, prune_ratio={prune_ratio}, complexity={complexity}")
            
            # Train a model on GPU
            model = SimpleNet(hidden_size).to(device)
            model = train_model(model, train_loader_subset, epochs=3, device=device)
            baseline_accuracy = evaluate_model(model, test_loader, device=device)  # Ensure test on GPU
            
            # Prune the first layer (on GPU)
            prune.l1_unstructured(model.fc1, name="weight", amount=prune_ratio)
            pruned_accuracy = evaluate_model(model, test_loader, device=device)
            
            # Store data
            accuracy_drop = max(0, baseline_accuracy - pruned_accuracy)  # Clamp non-negative
            data.append({
                'hidden_size': hidden_size,
                'prune_ratio': prune_ratio,
                'dataset_complexity': complexity,
                'accuracy_drop': accuracy_drop
            })
            counter += 1
            
            # Partial save every 50 rows (to resume if interrupted)
            if counter % 50 == 0 or counter == expected_rows:
                partial_df = pd.DataFrame(data)
                partial_df.to_csv('partial_pruning_data.csv', index=False)
                print(f"Partial save at {counter} rows: 'partial_pruning_data.csv'")

# Save full dataset
df = pd.DataFrame(data)
df.to_csv('pruning_data.csv', index=False)
print(f"Dataset generated and saved to 'pruning_data.csv' with {len(df)} rows:")
print(df.head())
print(df.describe())  # Check stats for accuracy_drop range

Training model #1/200: hidden_size=16, prune_ratio=0.2, complexity=0.1
Epoch 1/3 completed on cuda.
Epoch 2/3 completed on cuda.
Epoch 3/3 completed on cuda.
Evaluated accuracy: 0.8502 on cuda
Evaluated accuracy: 0.8501 on cuda
Training model #2/200: hidden_size=16, prune_ratio=0.2, complexity=0.3
Epoch 1/3 completed on cuda.
Epoch 2/3 completed on cuda.
Epoch 3/3 completed on cuda.
Evaluated accuracy: 0.8998 on cuda
Evaluated accuracy: 0.9002 on cuda
Training model #3/200: hidden_size=16, prune_ratio=0.2, complexity=0.5
Epoch 1/3 completed on cuda.
Epoch 2/3 completed on cuda.
Epoch 3/3 completed on cuda.
Evaluated accuracy: 0.9119 on cuda
Evaluated accuracy: 0.9120 on cuda
Training model #4/200: hidden_size=16, prune_ratio=0.2, complexity=0.7
Epoch 1/3 completed on cuda.
Epoch 2/3 completed on cuda.
Epoch 3/3 completed on cuda.
Evaluated accuracy: 0.9180 on cuda
Evaluated accuracy: 0.9172 on cuda
Training model #5/200: hidden_size=16, prune_ratio=0.2, complexity=1.0
Epoch 1/3 complet