In [1]:
import torch
import numpy as np
from torchvision import datasets
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn

from models import *

In [2]:
use_gpu = torch.cuda.is_available()
device = torch.device("cuda" if use_gpu else "cpu") 

In [3]:
batch_size = 128
normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])

train_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


test_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
n_epochs = 125  

In [5]:
###############
# LOSS 1 ONLY #
###############

In [6]:
model_name = "vggnet16_cost0"
model = VGG16()
PATH = '/home/pnataraj/private/ece284_saved_models/'+str(model_name)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [7]:
# number of epochs to train the model
model.to(device)
# per epoch, all the training data set is used once
model.train() # prep model for training


for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        
        loss1 = criterion(output, target)
        loss2 = model.features[0].weight.abs().sum()

        #loss = loss1
        loss = loss1 + 0*loss2  # Only use loss 1
        
        loss.backward()
        optimizer.step()
        train_loss1 += loss1.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss2 += loss2.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss  += loss.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss1 = train_loss1/len(train_loader.dataset)
    train_loss2 = train_loss2/len(train_loader.dataset)
    train_loss = train_loss/len(train_loader.dataset)

    if epoch % 10 == 0:
        # print('Epoch: {} \tTraining Loss1: {:.6f}'.format(epoch+1, train_loss1))
        # print('Epoch: {} \tTraining Loss2: {:.6f}'.format(epoch+1, train_loss2))
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1, train_loss))
    
# see following link for details of state_dict   
# https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            }, PATH)

Epoch: 1 	Training Loss: 1.474318
Epoch: 11 	Training Loss: 0.403917
Epoch: 21 	Training Loss: 0.229810
Epoch: 31 	Training Loss: 0.139805
Epoch: 41 	Training Loss: 0.086677
Epoch: 51 	Training Loss: 0.062855
Epoch: 61 	Training Loss: 0.041610
Epoch: 71 	Training Loss: 0.033780
Epoch: 81 	Training Loss: 0.026193
Epoch: 91 	Training Loss: 0.022190
Epoch: 101 	Training Loss: 0.016597
Epoch: 111 	Training Loss: 0.014777
Epoch: 121 	Training Loss: 0.013584


In [8]:
PATH = '/home/pnataraj/private/ece284_saved_models/vggnet16_cost0'
model = VGG16()
model.to(device)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

print('first conv layer weight absolute sum:', model.features[0].weight.abs().sum().data.item())


Test set: Accuracy: 8961/10000 (90%)

first conv layer weight absolute sum: 183.23004150390625


In [9]:
################
# LOSS 2 and 1 #
################

In [12]:
model_name = "vggnet16_cost1"
model = VGG16()
PATH = '/home/pnataraj/private/ece284_saved_models/'+str(model_name)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [13]:
# number of epochs to train the model 
model.to(device)
# per epoch, all the training data set is used once
model.train() # prep model for training


for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        
        loss1 = criterion(output, target)
        loss2 = model.features[0].weight.abs().sum()

        #loss = loss1
        loss = loss1 + loss2
        
        loss.backward()
        optimizer.step()
        train_loss1 += loss1.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss2 += loss2.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss  += loss.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss1 = train_loss1/len(train_loader.dataset)
    train_loss2 = train_loss2/len(train_loader.dataset)
    train_loss = train_loss/len(train_loader.dataset)

    if epoch % 10 == 0:
        print('Epoch: {} \tTraining Loss1: {:.6f}'.format(epoch+1, train_loss1))
        print('Epoch: {} \tTraining Loss2: {:.6f}'.format(epoch+1, train_loss2))
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1, train_loss))
    
# see following link for details of state_dict   
# https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            }, PATH)

Epoch: 1 	Training Loss1: 1.922165
Epoch: 1 	Training Loss2: 13.396772
Epoch: 1 	Training Loss: 15.318937
Epoch: 11 	Training Loss1: 0.968684
Epoch: 11 	Training Loss2: 10.753299
Epoch: 11 	Training Loss: 11.721982
Epoch: 21 	Training Loss1: 0.701288
Epoch: 21 	Training Loss2: 10.422272
Epoch: 21 	Training Loss: 11.123561
Epoch: 31 	Training Loss1: 0.542433
Epoch: 31 	Training Loss2: 10.079826
Epoch: 31 	Training Loss: 10.622258
Epoch: 41 	Training Loss1: 0.426482
Epoch: 41 	Training Loss2: 10.187684
Epoch: 41 	Training Loss: 10.614166
Epoch: 51 	Training Loss1: 0.343424
Epoch: 51 	Training Loss2: 10.011103
Epoch: 51 	Training Loss: 10.354527
Epoch: 61 	Training Loss1: 0.277458
Epoch: 61 	Training Loss2: 10.261899
Epoch: 61 	Training Loss: 10.539358
Epoch: 71 	Training Loss1: 0.207878
Epoch: 71 	Training Loss2: 10.294593
Epoch: 71 	Training Loss: 10.502471
Epoch: 81 	Training Loss1: 0.184922
Epoch: 81 	Training Loss2: 9.908035
Epoch: 81 	Training Loss: 10.092956
Epoch: 91 	Training Los

In [14]:
PATH = '/home/pnataraj/private/ece284_saved_models/vggnet16_cost1'
model = VGG16()
model.to(device)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
print('first conv layer weight absolute sum:', model.features[0].weight.abs().sum().data.item())


Test set: Accuracy: 6731/10000 (67%)

first conv layer weight absolute sum: 9.882144927978516


In [15]:
####################
# LOSS optimized 2 #
####################

In [5]:
model_name = "vggnet16_cost2"
model = VGG16()
PATH = '/home/pnataraj/private/ece284_saved_models/'+str(model_name)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [6]:
# number of epochs to train the model
model.to(device)
# per epoch, all the training data set is used once
model.train() # prep model for training


for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        
        loss1 = criterion(output, target)
        loss2 = model.features[0].weight.abs().sum()

        #loss = loss1
        loss = loss1 + 0.01*loss2
        
        loss.backward()
        optimizer.step()
        train_loss1 += loss1.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss2 += loss2.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss  += loss.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss1 = train_loss1/len(train_loader.dataset)
    train_loss2 = train_loss2/len(train_loader.dataset)
    train_loss = train_loss/len(train_loader.dataset)

    if epoch % 10 == 0:
        print('Epoch: {} \tTraining Loss1: {:.6f}'.format(epoch+1, train_loss1))
        print('Epoch: {} \tTraining Loss2: {:.6f}'.format(epoch+1, train_loss2))
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1, train_loss))
    
# see following link for details of state_dict   
# https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            }, PATH)

Epoch: 1 	Training Loss1: 1.476671
Epoch: 1 	Training Loss2: 136.600283
Epoch: 1 	Training Loss: 2.842674
Epoch: 11 	Training Loss1: 0.478637
Epoch: 11 	Training Loss2: 9.025821
Epoch: 11 	Training Loss: 0.568895
Epoch: 21 	Training Loss1: 0.299440
Epoch: 21 	Training Loss2: 7.888343
Epoch: 21 	Training Loss: 0.378324
Epoch: 31 	Training Loss1: 0.194817
Epoch: 31 	Training Loss2: 7.226530
Epoch: 31 	Training Loss: 0.267083
Epoch: 41 	Training Loss1: 0.129657
Epoch: 41 	Training Loss2: 6.657391
Epoch: 41 	Training Loss: 0.196231
Epoch: 51 	Training Loss1: 0.094112
Epoch: 51 	Training Loss2: 6.453971
Epoch: 51 	Training Loss: 0.158652
Epoch: 61 	Training Loss1: 0.072539
Epoch: 61 	Training Loss2: 6.144388
Epoch: 61 	Training Loss: 0.133983
Epoch: 71 	Training Loss1: 0.054878
Epoch: 71 	Training Loss2: 5.635196
Epoch: 71 	Training Loss: 0.111229
Epoch: 81 	Training Loss1: 0.048332
Epoch: 81 	Training Loss2: 5.582943
Epoch: 81 	Training Loss: 0.104161
Epoch: 91 	Training Loss1: 0.038770
Ep

In [7]:
PATH = '/home/pnataraj/private/ece284_saved_models/vggnet16_cost2'
model = VGG16()
model.to(device)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

print('first conv layer weight absolute sum:', model.features[0].weight.abs().sum().data.item())


Test set: Accuracy: 8627/10000 (86%)

first conv layer weight absolute sum: 4.874133110046387
