To conduct a dummy experiment where we have three modules (set of linear layers) < A, F and B >. We would like to freeze the weights pertaining to module F. 

Aim : Check during training that these weights are not altered

In [1]:
import torch
from torch.autograd import Variable

## custom dataloaders
import sys
sys.path.append('../')
from Datasets.loadDataset import loadDataset, getChannels, checkAndCreateFolder


import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


import torchvision
from networks import EmbeddingNet, TripletNet, SiameseNet, LeNet

import matplotlib
import matplotlib.pyplot as plt

import time

import numpy as np

In [2]:
def normal_init(m, mean, std):
    if isinstance(m, nn.Linear):
        m.weight.data.normal_(mean, std)

In [3]:
class ModuleA(nn.Module):
    def __init__(self):
        super(ModuleA, self).__init__()
        
        self.layer1 = nn.Linear(512, 256)
        self.layer2 = nn.Linear(256,128)
        
    def weight_init(self, mean, std):
        for m in self._modules:
            normal_init(self._modules[m], mean, std)       
    def forward(self, x):
        x = F.relu(self.layer1(x), inplace=True)
        x = F.relu(self.layer2(x), inplace=True)
        return x

In [4]:
class ModuleF(nn.Module):
    def __init__(self):
        super(ModuleF, self).__init__()
        
        self.layer1 = nn.Linear(128, 64)
        self.layer2 = nn.Linear(64,32)
        
    def weight_init(self, mean, std):
        for m in self._modules:
            normal_init(self._modules[m], mean, std)       
    def forward(self, x):
        x = F.relu(self.layer1(x), inplace=True)
        x = F.relu(self.layer2(x), inplace=True)
        return x

In [5]:
class ModuleB(nn.Module):
    def __init__(self):
        super(ModuleB, self).__init__()
        
        self.layer1 = nn.Linear(32, 16)
        self.layer2 = nn.Linear(16,1)
        
    def weight_init(self, mean, std):
        for m in self._modules:
            normal_init(self._modules[m], mean, std)       
    def forward(self, x):
        x = F.relu(self.layer1(x), inplace=True)
        x = F.relu(self.layer2(x), inplace=True)
        return x

Create instances pertaining to the three modules

In [6]:
batchSize = 128
inputSize = 512
learningRate = 0.0002
epochs = 100

Now, when we backpropagate the error, we only want to update modles A and B, leaving the weights of module FR as frozen.

Defining the optimizer and loss function. Let us assume that we would like to have an output value of 1 for all the inputs.

In [23]:
aInit = 'A_init.pt'
fInit = 'F_init.pt'
bInit = 'B_init.pt'

aFinal = 'A_fin.pt'
fFinal = 'F_fin.pt'
bFinal = 'B_fin.pt'

modelFolder = 'dummy'
checkAndCreateFolder(modelFolder)


def train(activate_a=True, activate_f=True, activate_b=True):
    
    # instantiate the three modules of neural network
    A = ModuleA()
    FR = ModuleF()
    B = ModuleB()
    
    for param in FR.parameters():
        param.requires_grad=False
    
    # initialize with random weights
    A.weight_init(0.0,0.02)
    FR.weight_init(0.0,0.02)
    B.weight_init(0.0,0.02)
    
    PATH = modelFolder+'/'+aInit
    torch.save(A.state_dict(), PATH)
    PATH = modelFolder+'/'+fInit
    torch.save(FR.state_dict(), PATH)
    PATH = modelFolder+'/'+bInit
    torch.save(B.state_dict(), PATH)
    
    
    # let's say we wish the combined network to output 1 for every input
    idealOutput = torch.ones(batchSize,
                             1)
    idealOutputV = Variable(idealOutput)

    # take the mean square error loss
    lossFunction = nn.MSELoss()
    
    # define optimzier for the three components of our network
    AOptimizer = optim.Adam(A.parameters(),
                            lr=learningRate,
                            betas = (0.5,0.999))
    '''
    FOptimizer = optim.Adam(FR.parameters(),
                            lr=learningRate,
                            betas = (0.5,0.999))
                            '''
    BOptimizer = optim.Adam(B.parameters(),
                            lr=learningRate,
                            betas = (0.5,0.999))
    
    for epoch in range(epochs) :
        
        # at the beginning of each epoch set the gradients to each node in computational graph as 0
        A.zero_grad()
        FR.zero_grad()
        B.zero_grad()

        # input is sampled from a unit Gaussian
        inputNN = torch.FloatTensor(batchSize,
                                    inputSize).random_(0,1)

        inputNNV = Variable(inputNN, requires_grad=True)

        # require_grad for each node is True as input also requires gradient
        outputA = A(inputNNV)
        outputF = FR(outputA)
        outputB = B(outputF)
        
        # calculate the loss function
        loss = lossFunction(outputB, idealOutputV)

        # calculate gradient for each leaf node
        loss.backward()
        
        # update according to the optimization method used
        if activate_b:
            BOptimizer.step()
        

        
        if activate_a:
            AOptimizer.step()
            
        #print loss
        
    PATH = modelFolder+'/'+aFinal
    torch.save(A.state_dict(), PATH)
    PATH = modelFolder+'/'+fFinal
    torch.save(FR.state_dict(), PATH)
    PATH = modelFolder+'/'+bFinal
    torch.save(B.state_dict(), PATH)

In [24]:
train(activate_a=True, activate_b=True, activate_f=True)

In [25]:
BInit = ModuleB()
PATH = modelFolder + '/' + bInit
BInit.load_state_dict(torch.load(PATH))

FInit = ModuleF()
PATH = modelFolder + '/' + fInit
FInit.load_state_dict(torch.load(PATH))

AInit = ModuleA()
PATH = modelFolder + '/' + aInit
AInit.load_state_dict(torch.load(PATH))

In [26]:
BFin = ModuleB()
PATH = modelFolder + '/' + bFinal
BFin.load_state_dict(torch.load(PATH))

FFin = ModuleF()
PATH = modelFolder + '/' + fFinal
FFin.load_state_dict(torch.load(PATH))

AFin = ModuleA()
PATH = modelFolder + '/' + aFinal
AFin.load_state_dict(torch.load(PATH))

In [27]:
def change():
    
    BL1 = (BFin._modules['layer1'].weight == BInit._modules['layer1'].weight).data.all()
    BL2 = (BFin._modules['layer2'].weight == BInit._modules['layer2'].weight).data.all()
    
    if BL1 and BL2:
        print 'Module B : Weights are same'
    else :
        print 'Modele B : Weights changed'
    
    AL1 = (AFin._modules['layer1'].weight == AInit._modules['layer1'].weight).data.all()
    AL2 = (AFin._modules['layer2'].weight == AInit._modules['layer2'].weight).data.all()
    
    if AL1 and AL2:
        print 'Module A : Weights are same'
    else :
        print 'Modele A : Weights changed'    
        
    FL1 = (FFin._modules['layer1'].weight == FInit._modules['layer1'].weight).data.all()
    FL2 = (FFin._modules['layer2'].weight == FInit._modules['layer2'].weight).data.all()
    
    if FL1 and FL2:
        print 'Module F : Weights are same'
    else :
        print 'Modele F : Weights changed'   


In [28]:
change()

Modele B : Weights changed
Modele A : Weights changed
Module F : Weights are same


In [15]:
BInit._modules['layer1']

Linear (32 -> 16)

AttributeError: 'ModuleB' object has no attribute 'layer'