In [1]:
from brevitas.core.scaling import ConstScaling
from brevitas.core.quant.int_base import IntQuant
import torch
import torch.nn as nn
from torch.nn.functional import linear
import torch.optim as optim
import torch.nn.functional as F
import argparse
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

Función para backward hook


Definición de argumentos y datos a usar

In [2]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        #print(model.linear_relu_stack[0].weight.grad)
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if args.dry_run:
                break


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [3]:
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=1, metavar='N',
            help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
            help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=6, metavar='N',
            help='number of epochs to train (default: 14)')
parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
            help='learning rate (default: 1.0)')
parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
            help='Learning rate step gamma (default: 0.7)')
parser.add_argument('--no-cuda', action='store_true', default=False,
            help='disables CUDA training')
parser.add_argument('--dry-run', action='store_true', default=False,
            help='quickly check a single pass')
parser.add_argument('--seed', type=int, default=1, metavar='S',
            help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
            help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=False,
            help='For Saving the current Model')
args = parser.parse_args("")
use_cuda = not args.no_cuda and torch.cuda.is_available()

print(args)

torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

train_kwargs = {'batch_size': args.batch_size}
test_kwargs = {'batch_size': args.test_batch_size}
if use_cuda:
    cuda_kwargs = {'num_workers': 1,
                'pin_memory': True,
                'shuffle': True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)

transform=transforms.Compose([
    transforms.ToTensor(),
    #media y desviación típica de la base de datos MNIST
    transforms.Normalize((0.1307,), (0.3081,))
    ])

dataset1 = datasets.MNIST('./data', train=True, download=True,
            transform=transform)
dataset2 = datasets.MNIST('./data', train=False,
            transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

Namespace(batch_size=1, dry_run=False, epochs=6, gamma=0.7, log_interval=10, lr=1.0, no_cuda=False, save_model=False, seed=1, test_batch_size=1000)


Creación de la linear custom layer

In [4]:
"""class LinearC(nn.Module):
    def __init__(self, in_features: int, out_features: int, bias: bool = True,device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(LinearC,self).__init__()
        self.in_features = in_features
        self.size_out = out_features
        self.weight = nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs))
        if bias:
            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
        nn.init.xavier_normal_(self.weight,1)

    def forward(self,x):
        output = linear(x,self.weight,bias=None)
        return torch.round(input=output,decimals=3)"""

class LinearC(nn.Linear):
    def __init__(self, in_features: int, out_features: int, bias: bool = True,device=None, dtype=None) -> None:
        super().__init__(in_features,out_features,bias=bias,device=device,dtype=dtype)
        

    def forward(self,x):
        output = super().forward(x)
        #print(output)
        return output#torch.round(input=output,decimals=5)    


In [13]:
class CustomNet(nn.Module):
    def __init__(self):
        super(CustomNet, self).__init__()
        self.flatten = nn.Flatten()
        self.l1 = nn.Linear(28*28,4)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(4,10)
        self.softmax = nn.LogSoftmax()
        

    def forward(self,x):
        x = self.flatten(x)
        x = self.l1(x)
        x = self.l2(self.relu(x))
        return self.softmax(x)
        
    

In [17]:
def create_backward_hooks( model :nn.Module, decimals: int) -> nn.Module:
    for parameter in model.parameters():
            parameter.register_hook(lambda grad: torch.round(input=grad,decimals=decimals))
    return model

def forward_hook(module, inputs, outputs):
    return torch.round(input=outputs,decimals=2)

def create_forward_hooks(model :nn.Module, decimals: int) -> nn.Module:
    for layer in model.children():
        layer.register_forward_hook(forward_hook)
        print(layer)
    return model

In [18]:
model = CustomNet()
model = create_backward_hooks(model,3)
model = create_forward_hooks(model,4)
model = model.to(device)

optimizer = optim.Adadelta(model.parameters(), lr=0.001)

Flatten(start_dim=1, end_dim=-1)
Linear(in_features=784, out_features=4, bias=True)
ReLU()
Linear(in_features=4, out_features=10, bias=True)
LogSoftmax(dim=None)


In [19]:
train(args, model, device, train_loader, optimizer, 1)

  return self.softmax(x)


tensor([[-2.1000, -2.3400, -2.1300, -2.6700, -2.2700, -2.0100, -2.6600, -1.9200,
         -2.4000, -2.9500]], device='cuda:0', grad_fn=<RoundBackward1>)


KeyboardInterrupt: Interrupted by user

In [None]:
def modify(tensor):
    tensor[1,1] = 0

prueba = torch.ones(2,2)
print(prueba)
modify(prueba)
prueba

tensor([[1., 1.],
        [1., 1.]])


tensor([[1., 1.],
        [1., 0.]])