### Loading PyTorch CNN Model

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

### Model 1 : Fully Connected model

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Layer 1
        self.conv1 = nn.Conv2d(1, 64, 3) # 28x28 -> 26x26
        self.b1    = nn.BatchNorm2d(64)
        self.pool  = nn.MaxPool2d(2, 2)  # 26x26 -> 13x13
        
        # Layer 2
        self.conv2 = nn.Conv2d(64, 128, 3)  # 13x13 -> 11x11
        self.b2    = nn.BatchNorm2d(128)
        #self.pool                          # 11x11 -> 5x5
        
        # Layer 3
        self.conv3 = nn.Conv2d(128, 128, 3) # 5x5 -> 3x3
        self.b3    = nn.BatchNorm2d(128)
        #self.pool                          # 3x3 -> 1x1 
        
        # FC Layers
        self.fc1 = nn.Linear(128 * 1 * 1, 512)
        self.bf1 = nn.BatchNorm1d(512)
        
        self.fc2 = nn.Linear(512, 256)
        self.bf2 = nn.BatchNorm1d(256)
        
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        # Layer 1
        x = self.pool(F.relu(self.b1(self.conv1(x))))
        #print(self.num_flat_features(x))
        
        # Layer 2
        x = self.pool(F.relu(self.b2(self.conv2(x))))
        
        # Layer 3
        x = self.pool(F.relu(self.b3(self.conv3(x))))
        
        # Flatten tensors
        x = x.view(-1, self.num_flat_features(x))
        
        # FC Layer 1
        x = F.relu(self.bf1(self.fc1(x)))
        x = F.dropout(x, training=self.training)
        
        # FC Layer 2
        x = F.relu(self.bf2(self.fc2(x)))
        x = F.dropout(x, training=self.training)
        
        # FC Layer 3
        x = self.fc3(x)
        return x
    
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

### Load dataset

In [2]:
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False, num_workers=2)

In [3]:
# Case 1 : Load on GPU
device = torch.device("cuda")
model = Net()
model.load_state_dict(torch.load('./results/model.pth'))
model.to(device)


# Case 2 : Load on CPU
#device = torch.device('cpu')
#model.load_state_dict(torch.load(PATH, map_location=device))


Net(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
  (b1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (b2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (b3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=128, out_features=512, bias=True)
  (bf1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (bf2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
)

### Test dat

In [4]:
model.eval()

# Calculate Validation Accuracy
total_eval = 0
correct_eval = 0.0
with torch.no_grad():
    for images_eval, labels_eval in testloader:
        images_eval, labels_eval = images_eval.to(device), labels_eval.to(device)
        outputs_eval = model(images_eval)
        _, predicted_eval = torch.max(outputs_eval.data, 1)
        total_eval += labels_eval.size(0)
        correct_eval += (predicted_eval == labels_eval).sum().item() 
val_acc = correct_eval / total_eval
print('Test | Size : %6d  | ValAcc: %.3f\n' %
      (len(testloader.dataset), val_acc))

Test | Size :  10000  | ValAcc: 0.987



### Transfer Learning?

In [5]:
another_dict = {}
for i, (k, v) in enumerate(model.state_dict().items()):
    print(i, k)
    if i <= 20:
        another_dict[k] = v
        #print(k)
print(len(another_dict))

# update & load
cur_dict = model.state_dict()
cur_dict.update(another_dict)
model.load_state_dict(cur_dict)
model.to(device)

print(len(model.state_dict()))


0 conv1.weight
1 conv1.bias
2 b1.weight
3 b1.bias
4 b1.running_mean
5 b1.running_var
6 b1.num_batches_tracked
7 conv2.weight
8 conv2.bias
9 b2.weight
10 b2.bias
11 b2.running_mean
12 b2.running_var
13 b2.num_batches_tracked
14 conv3.weight
15 conv3.bias
16 b3.weight
17 b3.bias
18 b3.running_mean
19 b3.running_var
20 b3.num_batches_tracked
21 fc1.weight
22 fc1.bias
23 bf1.weight
24 bf1.bias
25 bf1.running_mean
26 bf1.running_var
27 bf1.num_batches_tracked
28 fc2.weight
29 fc2.bias
30 bf2.weight
31 bf2.bias
32 bf2.running_mean
33 bf2.running_var
34 bf2.num_batches_tracked
35 fc3.weight
36 fc3.bias
21
37


In [6]:
print(model.state_dict())

OrderedDict([('conv1.weight', tensor([[[[-2.5679e-02,  1.2768e-01, -1.7729e-01],
          [-2.0581e-01, -2.5305e-01, -1.9026e-01],
          [-1.3796e-01, -1.5476e-02, -1.6153e-01]]],


        [[[-3.0971e-01,  1.5258e-01, -3.2308e-01],
          [ 2.6326e-01,  1.5560e-01,  1.9040e-01],
          [-7.5650e-02, -3.3277e-01, -9.2336e-02]]],


        [[[ 2.0863e-01, -5.6459e-02, -1.0615e-01],
          [ 8.5192e-02,  1.4713e-02, -2.2124e-01],
          [ 2.0818e-01, -1.5482e-01, -2.1861e-01]]],


        [[[-5.0022e-02, -2.3761e-01, -1.8780e-01],
          [-1.9244e-01, -9.7412e-02, -1.0576e-01],
          [-4.5556e-02, -1.4751e-02, -2.1402e-02]]],


        [[[ 4.9394e-02, -1.5709e-01, -1.3093e-01],
          [ 3.1017e-01,  1.1941e-02,  4.4425e-02],
          [-8.2639e-02,  1.3259e-02,  2.9113e-01]]],


        [[[-1.2636e-01,  1.3066e-04,  2.9539e-01],
          [ 3.1197e-02,  5.2154e-02,  1.0690e-01],
          [ 1.9844e-01, -2.4458e-01, -3.1848e-01]]],


        [[[ 1.9453e-01,  6.5

In [7]:
model.eval()

with torch.no_grad():
    for images_eval, labels_eval in testloader:
        images_eval, labels_eval = images_eval.to(device), labels_eval.to(device)
        outputs_eval = model(images_eval)
        print(outputs_eval.shape)

torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([1

torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([16, 10])
torch.Size([1

In [8]:
# 1. Directly Load a Pre-trained Model
# https://github.com/pytorch/vision/tree/master/torchvision/models
import torchvision.models as models
resnet50 = models.resnet50(pretrained=True)
# or
model = models.resnet50(pretrained=False)

# Maybe you want to modify the last fc layer?
resnet.fc = nn.Linear(2048, 2) 

# 2. Load part of parameters of a pretrained model as init for self-defined similar-architecture model.
# resnet50 is a pretrain model 
# self_defined indicates model you just define.
resnet50 = models.resnet50(pretrained=True) 
self_defined = Net(...) 

pretrained_dict = resnet50.state_dict() 
model_dict = self_defined.state_dict() 
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 

# update & load
model_dict.update(pretrained_dict) 
model.load_state_dict(model_dict)

# 3. Save & Load routines.
# routine 1
# torch.save(model.state_dict(), PATH)

# model = ModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))

# routine 2
# torch.save(model, PATH)
# model = torch.load(PATH)

NameError: name 'resnet' is not defined