## Testing BP in modules

In [4]:
import torch
from torch import autograd
import torch.nn as nn
from modules import BiHebb_modules as customized_modules

In [8]:
Linear = customized_modules.Linear
net = Linear(64, 16, False, 'FA')
inputs = torch.randn(256, 64)
targets = torch.randn(256, 16)
outputs = net(inputs)

In [9]:
net.weight

Parameter containing:
tensor([[-0.0951, -0.0698, -0.1098,  ...,  0.0070, -0.0207, -0.0809],
        [-0.0144,  0.1023, -0.1116,  ..., -0.0620, -0.0037, -0.1050],
        [ 0.1186, -0.0183, -0.0654,  ..., -0.0138,  0.1098,  0.0675],
        ...,
        [-0.0250,  0.0289, -0.0018,  ...,  0.0654,  0.0137, -0.0285],
        [ 0.0230,  0.0379, -0.0085,  ..., -0.0783,  0.0235, -0.0861],
        [-0.0977, -0.0028, -0.0749,  ...,  0.0710,  0.0609, -0.0683]],
       requires_grad=True)

In [10]:
net.weight_feedback

Parameter containing:
tensor([[ 0.0877,  0.1059,  0.1154,  ..., -0.1102,  0.1023, -0.0110],
        [-0.0324,  0.0121, -0.0165,  ...,  0.0114,  0.1079,  0.1062],
        [-0.0299, -0.0278, -0.0313,  ..., -0.1217, -0.0675, -0.1198],
        ...,
        [ 0.1204, -0.0493,  0.0408,  ..., -0.0653, -0.0489,  0.0011],
        [ 0.1121, -0.0224, -0.0891,  ..., -0.1090,  0.0258,  0.0174],
        [-0.0976,  0.0234, -0.0547,  ...,  0.0042, -0.0961, -0.0218]])

In [17]:
Conv2d = customized_modules.Conv2d
net = Conv2d(64, 16, 3, algorithm='FA')
inputs = torch.randn(256, 64, 32, 32)
outputs = net(inputs)
net.weight[0,0]

tensor([[ 0.0058, -0.0218, -0.0088],
        [-0.0004,  0.0107, -0.0296],
        [-0.0209, -0.0208,  0.0285]], grad_fn=<SelectBackward>)

In [18]:
net.weight_feedback[0,0]

tensor([[-0.0319,  0.0017, -0.0089],
        [-0.0184,  0.0032, -0.0298],
        [-0.0242,  0.0079, -0.0283]])

## Developing Target Prop

In [1]:
import torch
from torch import autograd
import torch.nn as nn
from modules import TP_modules as customized_modules

In [2]:
Linear = customized_modules.Linear
net = Linear(64, 16, False, 'TP')
inputs = torch.randn(256, 64)
targets = torch.randn(256, 16)
outputs = net(inputs)
print(outputs.shape)
loss = nn.MSELoss()(outputs , targets)
loss.backward()

# autograd.grad(loss, net.weight)[0].shape

torch.Size([256, 16])


In [3]:

import torch.nn as nn
import torch.optim as optim
import torchvision

import copy
import numpy as np
import scipy.stats as ss
import scipy
import h5py
import random
import argparse
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F

import matplotlib.pylab as plt

In [4]:
import socket
if socket.gethostname()[0:4] in  ['node','holm','wats']:
    path_prefix = '/rigel/issa/users/Tahereh/Research'
elif socket.gethostname() == 'SYNPAI':
    path_prefix = '/hdd6gig/Documents/Research'
elif socket.gethostname()[0:2] == 'ax':
    path_prefix = '/scratch/issa/users/tt2684/Research'
    plt.switch_backend('agg')
elif socket.gethostname() == 'turing':
    path_prefix = '/home/tahereh/Documents/Research'


imagesetdir = path_prefix+'/Data/'
resultsdir = path_prefix+'/Results/Toy_models/'

class Args:
    dum=None

args = Args()
args.batch_size=256
args.algorithm = 'TP' #'TP' #'BP'
args.no_cuda = False
args.seed = 0

use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)

use_cuda = True
# data loader
kwargs = {'num_workers': 0, 'pin_memory': True, 'drop_last':True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(imagesetdir, train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomAffine(degrees=20, translate=(0.1, 0.1),scale=(0.25,2) ),
                        transforms.Resize(32),
                        transforms.ToTensor(),
                       
                    ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(imagesetdir, train=False, transform=transforms.Compose([
                        transforms.RandomAffine(degrees=20, translate=(0.1, 0.1),scale=(0.25,2) ),
                        transforms.Resize(32),
                        transforms.ToTensor(),
                        
                    ])),
    batch_size=args.batch_size, shuffle=False, **kwargs)

# transforms.Normalize((0.1307,), (0.3081,))
n_layers = 3
algorithm = args.algorithm #'inverse'#'forward'#'inverse'#'BP'
n_dataloader = 100000
batch_size = args.batch_size


class ReLUGrad(nn.Module):
    def __init__(self):
        super(ReLUGrad, self).__init__()
    def forward(self, grad_output, input):
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

class Forward(nn.Module):
    def __init__(self):
        super(Forward, self).__init__()
        self.fc_0 = Linear(1024, 256, bias=False, algorithm=algorithm)
        self.fc_1 = Linear(256, 10, bias=False, algorithm=algorithm)
        self.relu = nn.ReLU()

    def forward(self, x):
        xrelu = self.fc_0(x)
        x0 = self.relu(xrelu)
        x1 = self.fc_1(x0)

        return x1 #, [x, x0, x1], xrelu

class Backward(nn.Module):
    def __init__(self):
        super(Backward, self).__init__()
        self.fc_1 = Linear(10, 256, bias=False, algorithm=algorithm)
        self.fc_0 = Linear(256, 1024, bias=False, algorithm=algorithm)
        self.grelu = ReLUGrad()

    def forward(self, x, x0):
        x1 = self.fc_1(x)
        x1 = self.grelu(x1, x0)
        x0 = self.fc_0(x1)

        return x0#, [x1,  x]

def transpose_weights(state_dict):

    state_dict_new = {}
    for k, item in state_dict.items():
        state_dict_new.update({k: item.t()})
    return state_dict_new



modelF = Forward().cuda() # main model
modelB = Backward().cuda() # backward network to compute gradients for modelF

modelC = Forward().cuda() # Forward Control model to compare to BP
modelC.load_state_dict(modelF.state_dict())
# start symmetric
# modelB.load_state_dict(transpose_weights(modelF.state_dict()) )
modelE = Forward().cuda()

optimizerC = optim.RMSprop(modelC.parameters(), lr=1e-3, weight_decay=1e-6)
optimizerF = optim.RMSprop([p for n,p in modelF.named_parameters() if 'feedback' not in n],  lr=1e-3, weight_decay=1e-6)
optimizerB = optim.RMSprop([p for n,p in modelB.named_parameters() if 'feedback' not in n],  lr=1e-3, weight_decay=1e-6)
optimizerB_TP = optim.RMSprop([p for n,p in modelF.named_parameters() if 'feedback' in n],  lr=1e-3, weight_decay=1e-6)


criterionF = nn.CrossEntropyLoss() #
# criterionB = nn.MSELoss() #

n_classes = 10
onehot = torch.zeros(train_loader.batch_size, n_classes).cuda()


In [5]:
for epoch in range(10):
    running_loss = 0.0
    for images, targets in train_loader:
        images = images.cuda()
        targets = targets.cuda()

        onehot = torch.zeros(train_loader.batch_size, n_classes).cuda()
        onehot.zero_()
        onehot.scatter_(1, targets.view(train_loader.batch_size,-1), 1)
        onehot.requires_grad = True

        images = images.view(images.shape[0], -1)
        out = modelF(images)
        loss = criterionF(out, targets)
        optimizerB_TP.zero_grad()
        loss.backward()
        optimizerB_TP.step()

        
        optimizerF.zero_grad()
        onehot.backward(torch.ones_like(onehot))
        optimizerF.step()

        running_loss += loss.item()
    print(epoch, running_loss/len(train_loader))


0 2.3023521543568015


KeyboardInterrupt: 

In [None]:
for epoch in range(10):
    running_loss = 0.0
    for images, targets in train_loader:
        images = images.cuda()
        targets = targets.cuda()

    

        images = images.view(images.shape[0], -1)
        out = modelF(images)
        loss = criterionF(out, targets)

        
        optimizerF.zero_grad()
        loss.backward()
        optimizerF.step()

        running_loss += loss.item()
    print(epoch, running_loss/len(train_loader))
