In [7]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 1000

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
torch.manual_seed(0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [14]:
        class Net(nn.Module):
                def __init__(self):
                        super().__init__()
                        self.conv1 = nn.Conv2d(3, 6, 5)
                        self.conv1_bn = nn.BatchNorm2d(6)
                        self.pool = nn.MaxPool2d(2, 2)
                        self.conv2 = nn.Conv2d(6, 8, 9)
                        self.conv2_bn = nn.BatchNorm2d(8)
                        self.fc1 = nn.Linear(8 * 3 * 3, 30)
                        self.fc1_bn = nn.BatchNorm1d(30)
                        self.fc2 = nn.Linear(30, 20)
                        self.fc2_bn = nn.BatchNorm1d(20)
                        self.fc3 = nn.Linear(20, 10)

                def forward(self, x):
                        x = self.pool(F.elu( self.conv1_bn(self.conv1(x))  ))
                        x = self.pool(F.elu( self.conv2_bn(self.conv2(x))  ))
                        x = torch.flatten(x, 1) # flatten all dimensions except batch
                        x = F.elu( self.fc1_bn(self.fc1(x)) )
                        x = F.elu( self.fc2_bn(self.fc2(x)) )
                        x = self.fc3(x)
                        return x

In [17]:
import torch.optim as optim
model = Net().to(device=device)
criterion = nn.CrossEntropyLoss()
# criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [18]:
import time
for i, data in enumerate(trainloader, 0):
    if i >= 1:
        break

    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data
    
inputs = inputs.to(device=device)
labels = labels.to(device=device)

start = time.time()
for epoch in range(100):  # loop over the dataset multiple times

    running_loss = 0.0
    

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # print statistics
    running_loss = loss.item()

    acc = (outputs.max(1)[1] == labels).sum().item()/labels.size(0)

    if epoch % 10 == 0:
        print('[%d, %5d] loss: %.3f acc: %.3f' % (epoch + 1, 0 + 1, running_loss, acc ), end = "")
        
        total_norm = 0
        for p in model.parameters():
            param_norm = p.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
        total_norm = total_norm ** (1. / 2)
        print( "  norm of grandient = ", total_norm )
        
#     print("\n\n\n\n\nPrint Gradient\n\n\n\n\n")
#     lst = list(model.parameters())

#     for i in range(len(lst)):
# #         print(lst[i].grad.shape)
#         print( "norm of grandient = ", torch.norm(lst[i].grad) )

print('Finished Training')
end = time.time()
print("total time = {} s".format(end-start))

[1,     1] loss: 2.366 acc: 0.089  norm of grandient =  1.4859647110109397
[11,     1] loss: 2.286 acc: 0.135  norm of grandient =  1.2788155500293428
[21,     1] loss: 2.181 acc: 0.198  norm of grandient =  1.1155429978194322
[31,     1] loss: 2.099 acc: 0.240  norm of grandient =  0.8319969686678818
[41,     1] loss: 2.043 acc: 0.272  norm of grandient =  0.7525410738803914
[51,     1] loss: 1.996 acc: 0.313  norm of grandient =  0.6584445130151481
[61,     1] loss: 1.955 acc: 0.338  norm of grandient =  0.6179793983114326
[71,     1] loss: 1.919 acc: 0.371  norm of grandient =  0.5890151475850511
[81,     1] loss: 1.885 acc: 0.385  norm of grandient =  0.5715705582525021
[91,     1] loss: 1.854 acc: 0.407  norm of grandient =  0.557795034623814
Finished Training
total time = 1.0070459842681885 s


In [None]:
outputs[0]
# labels[0]

In [None]:
# nvar = 10160
# x = .1 * np.ones((nvar,1))
# x_torch = torch.from_numpy(x).cuda()
# torch.nn.utils.vector_to_parameters(x_torch, model.parameters())

lst = list(model.parameters())

for i in range(len(lst)):
    print(lst[i].grad.shape)
    print(lst[i].grad[0])

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
#         self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc0 = nn.Linear(3*4*4, 20)
        self.fc01 = nn.Linear(20, 10)
#         self.fc1 = nn.Linear(3 * 16 * 16, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(x)
        x = self.pool(x)
        x = self.pool(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc0(x))
#         x = F.relu(self.fc2(x))
        x = self.fc01(x)
#         x = self.fc1(x)
#         x = self.fc2(x)
#         x = self.fc3(x)
        return x

# class Net(nn.Module):
#                 def __init__(self):
#                         super().__init__()
#                         self.pool = nn.MaxPool2d(2, 2)

#                         self.fc1 = nn.Linear(3 * 8 * 8, 120)
#                         self.fc2 = nn.Linear(120, 84)
#                         self.fc3 = nn.Linear(84, 10)

#                 def forward(self, x):
#                         x = self.pool(x)
#                         x = self.pool(x)
#                         x = torch.flatten(x, 1) # flatten all dimensions except batch
#                         x = self.fc1(x)
#                         x = self.fc2(x)
#                         x = self.fc3(x)
#                         return x

model = Net()

In [None]:
nvar = 10160
x = .1 * np.ones((nvar,1))
x_torch = torch.from_numpy(x).cuda()
torch.nn.utils.vector_to_parameters(x_torch, model.parameters())

lst = list(model.parameters())

for i in range(len(lst)):
#     print(lst[i].grad.shape)
    if i == 0:
        print(lst[0])

In [None]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 196 == 195:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 196))
            running_loss = 0.0
            acc = (outputs.max(1)[1] == labels).sum().item()/labels.size(0)
            print('[%d, %5d] loss: %.3f acc: %.3f' % (epoch + 1, i + 1, running_loss, acc ))

print('Finished Training')

In [None]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
#         if i >= 1:
#             break

        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss = loss.item()
        
        acc = (outputs.max(1)[1] == labels).sum().item()/labels.size(0)
        
        
        print('[%d, %5d] loss: %.3f acc: %.3f' % (epoch + 1, i + 1, running_loss, acc ))

print('Finished Training')

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
#         self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc0 = nn.Linear(3*4*4, 20)
        self.fc01 = nn.Linear(20, 10)
#         self.fc1 = nn.Linear(3 * 16 * 16, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(x)
        x = self.pool(x)
        x = self.pool(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc0(x))
#         x = F.relu(self.fc2(x))
        x = self.fc01(x)
#         x = self.fc1(x)
#         x = self.fc2(x)
#         x = self.fc3(x)
        return x

# class Net(nn.Module):
#                 def __init__(self):
#                         super().__init__()
#                         self.pool = nn.MaxPool2d(2, 2)

#                         self.fc1 = nn.Linear(3 * 8 * 8, 120)
#                         self.fc2 = nn.Linear(120, 84)
#                         self.fc3 = nn.Linear(84, 10)

#                 def forward(self, x):
#                         x = self.pool(x)
#                         x = self.pool(x)
#                         x = torch.flatten(x, 1) # flatten all dimensions except batch
#                         x = self.fc1(x)
#                         x = self.fc2(x)
#                         x = self.fc3(x)
#                         return x

model = Net()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        if i >= 1:
            break
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss = loss.item()
        
        acc = (outputs.max(1)[1] == labels).sum().item()/labels.size(0)
        
        
        print('[%d, %5d] loss: %.3f acc: %.3f' % (epoch + 1, i + 1, running_loss, acc ))

print('Finished Training')

In [8]:
parameter_lst = list(model.parameters())

for i in range(len(parameter_lst)):
    print(parameter_lst[i].shape)



torch.Size([6, 3, 5, 5])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([8, 6, 9, 9])
torch.Size([8])
torch.Size([8])
torch.Size([8])
torch.Size([30, 72])
torch.Size([30])
torch.Size([30])
torch.Size([30])
torch.Size([20, 30])
torch.Size([20])
torch.Size([20])
torch.Size([20])
torch.Size([10, 20])
torch.Size([10])


In [None]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    acc = 0.0
    for i, data in enumerate(trainloader, 0):
        
        if i >= 1:
            break
        
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        
        print(inputs.shape)
        print(inputs[0][0])


        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs)
        
#         print(outputs.shape)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        

        # print statistics
        running_loss = loss.item()
        
        acc = (outputs.max(1)[1] == labels).sum().item()/labels.size(0)
        
        
        print('[%d, %5d] loss: %.3f acc: %.3f' % (epoch + 1, i + 1, running_loss, acc ))

print('Finished Training')

In [None]:
lst = list(model.parameters())
for i in range(len(lst)):
#     print(lst[i].shape)
    print(lst[i].grad.shape)
    if i == 0:
        print(lst[0].grad[0])

inputs, labels = data
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()

lst = list(model.parameters())
for i in range(len(lst)):
#     print(lst[i].shape)
    print(lst[i].grad.shape)
    if i == 0:
        print(lst[0].grad[0])
        


In [None]:
print(lst[0].grad.shape)
vec = torch.reshape(lst[0].grad,(-1,1)).numpy()
print(vec.shape[0])