In [2]:
import torch
from torch import autograd
import torch.nn as nn
from modules import TP_modules as customized_modules

In [3]:
Linear = customized_modules.Linear
net = Linear(64, 16, False, 'TP')
inputs = torch.randn(256, 64)
targets = torch.randn(256, 16)
outputs = net(inputs)
print(outputs.shape)
loss = nn.MSELoss()(outputs , targets)
loss.backward()

# autograd.grad(loss, net.weight)[0].shape

torch.Size([256, 16])


In [4]:

import torch.nn as nn
import torch.optim as optim
import torchvision

import copy
import numpy as np
import scipy.stats as ss
import scipy
import h5py
import random
import argparse
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F

import matplotlib.pylab as plt

In [25]:
import socket
if socket.gethostname()[0:4] in  ['node','holm','wats']:
    path_prefix = '/rigel/issa/users/Tahereh/Research'
elif socket.gethostname() == 'SYNPAI':
    path_prefix = '/hdd6gig/Documents/Research'
elif socket.gethostname()[0:2] == 'ax':
    path_prefix = '/scratch/issa/users/tt2684/Research'
    plt.switch_backend('agg')
elif socket.gethostname() == 'turing':
    path_prefix = '/home/tahereh/Documents/Research'


imagesetdir = path_prefix+'/Data/'
resultsdir = path_prefix+'/Results/Toy_models/'

class Args:
    dum=None

args = Args()
args.batch_size=256
args.algorithm = 'BP' #'TP' #'BP'
args.no_cuda = False
args.seed = 0

use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)

use_cuda = True
# data loader
kwargs = {'num_workers': 0, 'pin_memory': True, 'drop_last':True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(imagesetdir, train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomAffine(degrees=20, translate=(0.1, 0.1),scale=(0.25,2) ),
                        transforms.Resize(32),
                        transforms.ToTensor(),
                       
                    ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(imagesetdir, train=False, transform=transforms.Compose([
                        transforms.RandomAffine(degrees=20, translate=(0.1, 0.1),scale=(0.25,2) ),
                        transforms.Resize(32),
                        transforms.ToTensor(),
                        
                    ])),
    batch_size=args.batch_size, shuffle=False, **kwargs)

# transforms.Normalize((0.1307,), (0.3081,))
n_layers = 3
algorithm = args.algorithm #'inverse'#'forward'#'inverse'#'BP'
n_dataloader = 100000
batch_size = args.batch_size


class ReLUGrad(nn.Module):
    def __init__(self):
        super(ReLUGrad, self).__init__()
    def forward(self, grad_output, input):
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

class Forward(nn.Module):
    def __init__(self):
        super(Forward, self).__init__()
        self.fc_0 = Linear(1024, 256, bias=False, algorithm=algorithm)
        self.fc_1 = Linear(256, 10, bias=False, algorithm=algorithm)
        self.relu = nn.ReLU()

    def forward(self, x):
        xrelu = self.fc_0(x)
        x0 = self.relu(xrelu)
        x1 = self.fc_1(x0)

        return x1 #, [x, x0, x1], xrelu

class Backward(nn.Module):
    def __init__(self):
        super(Backward, self).__init__()
        self.fc_1 = Linear(10, 256, bias=False, algorithm=algorithm)
        self.fc_0 = Linear(256, 1024, bias=False, algorithm=algorithm)
        self.grelu = ReLUGrad()

    def forward(self, x, x0):
        x1 = self.fc_1(x)
        x1 = self.grelu(x1, x0)
        x0 = self.fc_0(x1)

        return x0#, [x1,  x]

def transpose_weights(state_dict):

    state_dict_new = {}
    for k, item in state_dict.items():
        state_dict_new.update({k: item.t()})
    return state_dict_new

# A simple hook class that returns the input and output of a layer during forward/backward pass
class Hook():
    def __init__(self, module, backward=False):
        if backward==False:
            self.hook = module.register_forward_hook(self.hook_fn)
        else:
            self.hook = module.register_backward_hook(self.hook_fn)
    def hook_fn(self, module, input, output):
        self.input = input
        self.output = output
    def close(self):
        self.hook.remove()


class HookTensor():
    def __init__(self, tensor, backward=False):
        self.hook = tensor.register_hook(self.hook_fn)
    def hook_fn(self, grad):
        self.grad = grad
    def close(self):
        self.hook.remove()

modelF = Forward().cuda() # main model
modelB = Backward().cuda() # backward network to compute gradients for modelF

modelC = Forward().cuda() # Forward Control model to compare to BP
modelC.load_state_dict(modelF.state_dict())
# start symmetric
# modelB.load_state_dict(transpose_weights(modelF.state_dict()) )
modelE = Forward().cuda()

optimizerC = optim.Adam(modelC.parameters(), lr=1e-3, weight_decay=1e-6)
optimizerF = optim.Adam([p for n,p in modelF.named_parameters() if 'feedback' not in n],  lr=1e-4, weight_decay=1e-6)
optimizerB = optim.RMSprop([p for n,p in modelB.named_parameters() if 'feedback' not in n],  lr=1e-4, weight_decay=1e-6)
optimizerB_TP = optim.RMSprop([p for n,p in modelF.named_parameters() if 'feedback' in n],  lr=1e-4, weight_decay=1e-6)


criterionF = nn.CrossEntropyLoss() #
# criterionB = nn.MSELoss() #

n_classes = 10
onehot = torch.zeros(train_loader.batch_size, n_classes).cuda()


In [24]:
for epoch in range(10):
    running_loss = 0.0
    for images, targets in train_loader:
        images = images.cuda()
        targets = targets.cuda()

        onehot = torch.zeros(train_loader.batch_size, n_classes).cuda()
        onehot.zero_()
        onehot.scatter_(1, targets.view(train_loader.batch_size,-1), 1)
        onehot.requires_grad = True

        images = images.view(images.shape[0], -1)
        out = modelF(images)
        loss = criterionF(out, targets)
        optimizerB_TP.zero_grad()
        loss.backward()
        optimizerB_TP.step()

        
        optimizerF.zero_grad()
        onehot.backward(torch.ones_like(onehot))
        optimizerF.step()

        running_loss += loss.item()
    print(epoch, running_loss/len(train_loader))


0 2.301842774081434
1 2.302446872760088
2 2.302561936215458
3 2.302581986810407
4 2.3025847296429496
5 2.302584926287333


KeyboardInterrupt: 

In [26]:
for epoch in range(10):
    running_loss = 0.0
    for images, targets in train_loader:
        images = images.cuda()
        targets = targets.cuda()

    

        images = images.view(images.shape[0], -1)
        out = modelF(images)
        loss = criterionF(out, targets)

        
        optimizerF.zero_grad()
        loss.backward()
        optimizerF.step()

        running_loss += loss.item()
    print(epoch, running_loss/len(train_loader))


0 1.8587086756005247
1 1.577051299250024
2 1.4693135746523864
3 1.3571297867685301
4 1.2599245248696742


KeyboardInterrupt: 

In [1]:
import torch
from torch import autograd
import torch.nn as nn
from modules import customized_modules_layerwise as customized_modules
ConvTranspose2d = customized_modules.AsymmetricFeedbackConvTranspose2d

# net = nn.ConvTranspose2d(64,16, 3, bias=None,)
net = ConvTranspose2d(64,16, 3, bias=None, algorithm='IA')
inputs = torch.randn(256, 64, 7, 7)
targets = torch.randn(256, 16, 9, 9)
outputs = net(inputs)
print(outputs.shape)
loss = nn.MSELoss()(outputs , targets)
#loss.backward()

autograd.grad(loss, net.weight)[0].shape

torch.Size([256, 16, 9, 9])


torch.Size([64, 16, 3, 3])

In [2]:
runname='Apr21-14-25_MNIST_9578f81f20_322'
class Args:
    config_file = '/home/tt2684/Research/Results/Symbio/Symbio/%s/configs.yml'%runname
    method = 'SLVanilla'

In [3]:

import argparse
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from tensorboardX import SummaryWriter

import torch.optim as optim
import torchvision
import yaml 
import os
import copy
import json
import numpy as np
import scipy.stats as ss
import scipy
import h5py
import random
import argparse
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
import matplotlib
import matplotlib.pylab as plt
matplotlib.use('agg')
import pprint 
pp = pprint.PrettyPrinter(indent=4)

from utils import state_dict_utils

import pytorch_ssim


# # toggle_state_dict = state_dict_utils.toggle_state_dict # for ResNetLraveled
# toggle_state_dict = state_dict_utils.toggle_state_dict_resnets # for custom_resnets
# toggle_state_dict_YYtoBP = state_dict_utils.toggle_state_dict_YYtoBP

# # from models import custom_models_ResNetLraveled as custom_models
# from models import custom_resnets as custom_models

model_names = sorted(name for name in models.__dict__
    if name.islower() and not name.startswith("__")
    and callable(models.__dict__[name]))

import socket
if socket.gethostname()[0:4] in  ['node','holm','wats']:
    path_prefix = '/rigel/issa/users/Tahereh/Research'
elif socket.gethostname() == 'SYNPAI':
    path_prefix = '/hdd6gig/Documents/Research'
elif socket.gethostname()[0:2] == 'ax':
    path_prefix = '/home/tt2684/Research'

# parser = argparse.ArgumentParser(description='PyTorch Training')
# parser.add_argument(
#         '--config-file',
#         dest='config_file',
#         type=argparse.FileType(mode='r'))


# parser.add_argument('--method', type=str, default='BP', metavar='M',
#                     help='method:BP|SLVanilla|SLBP|FA|SLTemplateGenerator')


args = Args()
assert args.config_file, 'Please specify a config file path'
if args.config_file:
    with open(args.config_file, 'r') as stream:
        data = yaml.safe_load(stream)        
#     delattr(args, 'config_file')
    arg_dict = args.__dict__
    for key, value in data.items():
        setattr(args, key, value)

pp.pprint(arg_dict)
print(args.method)
with open(args.resultsdir+'args.yml', 'w') as outfile:
    
    yaml.dump(vars(args), outfile, default_flow_style=False)

writer = SummaryWriter(log_dir=args.tensorboarddir)

{   'archd': 'AsymResLNet10B',
    'arche': 'AsymResLNet10F',
    'base_channels': 64,
    'batch_size': 256,
    'customdatasetdir_train': '/home/tt2684/Research/Data/Custom_datasets/MNIST/',
    'databasedir': '/home/tt2684/Research/Results/database/Symbio/EAsymResLNet10FDAsymResLNet10B/MNIST/',
    'dataset': 'MNIST',
    'dist_backend': 'nccl',
    'dist_url': 'tcp://224.66.41.62:23456',
    'epochs': 150,
    'evaluate': False,
    'factord': 0.1,
    'factore': 0.1,
    'gamma': 0.001,
    'gpu': None,
    'hash': None,
    'imagesetdir': '/home/tt2684/Research/Data/MNIST/',
    'input_size': 32,
    'loadinitialization': '',
    'lossfuncB': 'MSELoss',
    'lrB': 0.001,
    'lrF': 0.001,
    'momentum': 0.9,
    'multiprocessing_distributed': False,
    'n_classes': 10,
    'note': '**modules_layerwise_denom_for_both=1**',
    'offset': 10,
    'optimizerB': 'RMSprop',
    'optimizerF': 'RMSprop',
    'path_prefix': '/home/tt2684/Research',
    'path_save_model': '/home/tt2684/R

In [4]:
if 'AsymResLNet' in args.arche:
    toggle_state_dict = state_dict_utils.toggle_state_dict_normalize
    from models import custom_models_ResNetLraveled as custom_models

elif 'asymresnet' in args.arche:
    toggle_state_dict = state_dict_utils.toggle_state_dict_resnets
    from models import custom_resnets as custom_models

elif args.arche.startswith('resnet'):
    from models import resnets as custom_models
    #just for compatibality
    toggle_state_dict = state_dict_utils.toggle_state_dict_resnets

toggle_state_dict_YYtoBP = state_dict_utils.toggle_state_dict_YYtoBP

In [5]:
#MNIST
args.n_classes = 10
if args.input_size is None:
    input_size = 32
else:
    input_size = args.input_size
image_channels = 1
    

In [6]:
args_modelF = {'algorithm': args.method, 'base_channels':args.base_channels, 'image_channels':image_channels, 'n_classes':args.n_classes}
args_modelB ={'algorithm': 'FA','base_channels':args.base_channels, 'image_channels':image_channels, 'n_classes':args.n_classes}

modelF = getattr(custom_models, args.arche)(**args_modelF)
modelB = getattr(custom_models, args.archd)(**args_modelB)

In [7]:
modelB

AsymResLNet10B(
  (upsample2): AsymmetricFeedbackConvTranspose2d(10, 128, kernel_size=(1, 1), stride=(2, 2), output_padding=(1, 1), bias=False)
  (bn42): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  (conv42): AsymmetricFeedbackConvTranspose2d(10, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (relu): ReLU(inplace=True)
  (bn41): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  (conv41): AsymmetricFeedbackConvTranspose2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn32): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  (conv32): AsymmetricFeedbackConvTranspose2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn31): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  (conv31): AsymmetricFeedbackConvTranspose2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1

In [8]:
inputs = torch.randn(256, 1, 32, 32)
targets = torch.randn(256, 1, 34, 34)

In [9]:
latents, outputs = modelF(inputs)
preconv1, recons = modelB(latents.detach())

In [10]:
recons.shape

torch.Size([256, 1, 34, 34])

In [11]:
preconv1.shape

torch.Size([256, 64, 16, 16])

In [12]:
loss = nn.MSELoss()(recons, targets)

In [13]:
loss.backward()

In [None]:
# 

In [None]:
RuntimeError: Given transposed=1, weight of size 2097152 1 16 16, expected input[1, 32768, 16, 16] to have 2097152 channels, but got 32768 c

In [13]:
16384/256


64.0

torch.Size([256, 16, 9, 9])
before ravel: torch.Size([256, 64, 7, 7]) torch.Size([256, 16, 9, 9]) 16384
after ravel: torch.Size([1, 16384, 7, 7]) torch.Size([262144, 1, 9, 9]) 16384


RuntimeError: Given groups=16384, weight of size 16384 1 7 7, expected input[262144, 1, 9, 9] to have 16384 channels, but got 1 channels instead

In [None]:
outputs.shape

In [1]:
import torch
import torch.nn as nn
from modules import customized_modules_layerwise as customized_modules
Conv2d = customized_modules.AsymmetricFeedbackConv2d
net = Conv2d(3,16,5, bias=None, algorithm='FA')
inputs = torch.randn(256, 3, 32, 32)
targets = torch.randn(256, 16, 28, 28)
outputs = net(inputs)
loss = nn.MSELoss()(outputs , targets)
loss.backward()

before ravel: torch.Size([256, 3, 32, 32]) torch.Size([256, 16, 28, 28]) 768
after ravel: torch.Size([1, 768, 32, 32]) torch.Size([12288, 1, 28, 28]) 768
