In [1]:
# Our setup
import torch
from torch.functional import F

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu" if not torch.cuda.is_available() else "cuda:1"

import copy
import numpy as np
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

from sad_nns.uncertainty import *
from neurops import *

In [2]:
batch_size = 128

In [3]:
dataset = datasets.MNIST('../data', train=True, download=True,
                     transform=transforms.Compose([ 
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,)),
                            transforms.Resize((14,14))
                            # transforms.Resize((28,28)),
                            # transforms.Lambda(lambda x: torch.flatten(x))
                        ]))
train_set, val_set = torch.utils.data.random_split(dataset, lengths=[int(0.9*len(dataset)), int(0.1*len(dataset))])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,)),
                            transforms.Resize((14,14))
                            # transforms.Resize((28,28)),
                            # transforms.Lambda(lambda x: torch.flatten(x))
                        ])),
    batch_size=batch_size, shuffle=True)

def train(model, train_loader, optimizer, criterion, epochs=10, val_loader=None, verbose=True):
    model.train()

    train_accs = []
    test_accs = []

    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            if batch_idx % 100 == 0 and verbose:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))
        
        correct = 0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)   
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            # print(pred)
            correct += pred.eq(target.view_as(pred)).sum().item()
        train_acc = 100. * correct / len(train_loader.dataset)
        train_accs.append(train_acc)

        if val_loader is not None:
            if verbose:
                print("Validation: ", end = "")
            ta = test(model, val_loader, criterion, verbose)
            test_accs.append(ta)

    return train_accs, test_accs

def test(model, test_loader, criterion, verbose=True):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    
    if verbose:
        print('Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    
    return 100. * correct / len(test_loader.dataset)

In [4]:
# Assume model, optimizer and criterion already initialized, example below
model = ModSequential(
        ModConv2d(in_channels=1, out_channels=8, kernel_size=7, padding=1),
        ModConv2d(in_channels=8, out_channels=16, kernel_size=7, padding=1),
        ModConv2d(in_channels=16, out_channels=16, kernel_size=5),
        ModLinear(64, 32),
        ModLinear(32, 10, nonlinearity=""),
        track_activations=False,
        track_auxiliary_gradients=True,
       input_shape = (1, 14, 14)
    ).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# grow x neurons to each layer every y epoch, grow total_growth_epoch times
x = 1
y = 2
total_growh_epoch = 12
for iter in range(12):
    for i in range(len(model)-1):
        to_add = x # can change to be adding different number of neurons per layer
        # print("Layer {} neurons to add: {}".format(i, to_add))
        model.grow(i, to_add, fanin_weights="kaiming_uniform", optimizer=optimizer)
    # print("The grown model now has {} effective parameters.".format(model.parameter_count(masked = False)))
    # print("Validation after growing: ", end = "")
    # test(model, val_loader, criterion)
    train(model, train_loader, optimizer, criterion, epochs=y, val_loader=val_loader, verbose=False)
    # print("Validation after training for "+ str(y) +" epoch: ", end = "")
    # test(model, val_loader, criterion)

# for j in range(len(model)):
#     print("Layer {} weight matrix after growth {}".format(j, model[j].weight.size()))
# print("The grown model now has {} effective parameters.".format(model.parameter_count(masked = False)))
# test(model, val_loader, criterion)



Validation: Average loss: 0.0167, Accuracy: 1923/6000 (32.05%)


KeyboardInterrupt: 