In [2]:
# Reload modules automatically
# https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.optim import SGD, Adam
from torch.nn.utils.clip_grad import clip_grad_norm_
import torch.nn.functional as F
from strn_and_rbstness.data import GraphDataset, split
from strn_and_rbstness.helper.utils import accuracy
from strn_and_rbstness.models import create_model
from strn_and_rbstness.train import _train
from torch_geometric.data import Data, Dataset, DataLoader
from torch_geometric.datasets import Planetoid, Reddit
from torch_geometric.nn import MessagePassing, DataParallel
from torch_geometric.nn.inits import reset, uniform
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import GCNConv



In [4]:
class GCNNet(torch.nn.Module):
    def __init__(self, in_channels, out_channels, hidden_dim, device,
                 activation, dropout):
        super(GCNNet, self).__init__()
        conv = GCNConv

        self.conv1 = conv(in_channels, hidden_dim)
        self.conv2 = conv(hidden_dim, out_channels)

        self.activation = F.relu if activation == 'relu' else F.elu
        self.dropout = dropout
        self.device = device

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = x.double()

        x = self.conv1(x, edge_index)
        x = self.activation(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

class DPSGD(SGD):

    def __init__(self, params, noise_scale, gradient_norm_bound, lot_size,
                 sample_size, lr=0.01):
        super(DPSGD, self).__init__(params, lr=lr)

        self.noise_scale = noise_scale
        self.gradient_norm_bound = gradient_norm_bound
        self.lot_size = lot_size
        self.sample_size = sample_size
        for group in self.param_groups:
            for p in group['params']:
                if p.requires_grad:
                    p.accumulated_grads = []

    def per_sample_step(self):
        for group in self.param_groups:
            for p in group['params']:
                if p.requires_grad:
                    per_sample_grad = p.grad.detach().clone()
                    print(per_sample_grad)
                    print(per_sample_grad.shape)
                    ## Clipping gradient
                    clip_grad_norm_(per_sample_grad,
                                    max_norm=self.gradient_norm_bound)
                    p.accumulated_grads.append(per_sample_grad)

    def zero_accum_grad(self):
        for group in self.param_groups:
            for p in group['params']:
                p.accumulated_grads = []

    def zero_sample_grad(self):
        super(DPSGD, self).zero_grad()

    def step(self, device, *args, **kwargs):
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                # DP:
                p.grad.data = torch.stack(p.accumulated_grads, dim=0).clone()

                ## Adding noise and aggregating each element of the lot:
                p.grad.data += torch.empty(p.grad.data.shape).normal_(mean=0.0, std=(self.noise_scale*self.gradient_norm_bound)).to(device)
                p.grad.data = torch.sum(p.grad.data, dim=0) * self.sample_size / self.lot_size
        super(DPSGD, self).step(*args, **kwargs)

In [5]:
data = Planetoid("data", "Cora")[0]
print(data)
num_features = 1433
num_classes = 7
hidden_dim = 32
activation = "relu"
# default values
noise_scale = 4 
gradient_norm_bound = 1
lot_size = 1
sample_size = 1

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [22]:
for param in model.parameters():
    print(param.shape)

total_params = 0
for param in list(model.parameters()):
    nn = 1
    for sp in list(param.size()):
        nn = nn * sp
    total_params += nn
print("Total parameters", total_params)

model_params = filter(lambda param: param.requires_grad,
                        model.parameters())
trainable_params = sum([np.prod(param.size())
                        for param in model_params])
print("Trainable parameters", trainable_params)

torch.Size([32])
torch.Size([32, 1433])
torch.Size([7])
torch.Size([7, 32])
Total parameters 46119
Trainable parameters 46119


In [6]:
# init
model = GCNNet(num_features, num_classes, hidden_dim, "cpu", activation, True).double()
optimizer = DPSGD(model.parameters(), noise_scale, gradient_norm_bound, lot_size,
                  sample_size, lr=0.01)
# Predict
optimizer.zero_accum_grad()
optimizer.zero_sample_grad()
pred = model(data)
loss_f = torch.nn.NLLLoss()
loss = loss_f(pred, data.y)
print(loss)
loss.backward()


tensor(1.9459, dtype=torch.float64, grad_fn=<NllLossBackward0>)


In [7]:
optimizer.per_sample_step()
#optimizer.step("cpu")

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], dtype=torch.float64)
torch.Size([32])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float64)
torch.Size([32, 1433])
tensor([ 0.0132,  0.0627, -0.0115, -0.1592, -0.0145,  0.0328,  0.0764],
       dtype=torch.float64)
torch.Size([7])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
    