In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable

import numpy as np
import pandas as pd

from torchdp import PrivacyEngine, utils, autograd_grad_sample

class MLP(torch.nn.Module):
    def __init__(self, hidden_layer_size, in_size, classes):
        super(MLP, self).__init__()
        self.in_size=in_size
        self.hidden_layer_size=hidden_layer_size
        self.classes = classes
        self.forward_1 = torch.nn.Linear(self.in_size,self.hidden_layer_size)
        self.relu = torch.nn.ReLU()
        self.forward_2 = torch.nn.Linear(self.hidden_layer_size, self.classes)
        self.soft_max = torch.nn.Softmax()
        
    def forward(self, x):
        # print(x)
        hidden_layer = self.forward_1(x)
        # print(hidden_layer)
        relu_step = self.relu(hidden_layer)
        out = self.forward_2(relu_step)
        soft_max = self.soft_max(out)
        return soft_max
    
### Load dataset and split
from load_data import load_data
from sklearn.model_selection import train_test_split
loaded_datasets = load_data()
data = loaded_datasets['car']["data"]
X = data.loc[:, data.columns != loaded_datasets['car']["target"]]
y = data.loc[:, data.columns == loaded_datasets['car']["target"]]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MLP(100, X.shape[1], len(np.unique(y)))
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()

# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)
        
x_train = Variable(torch.from_numpy(x_train.to_numpy()).float(), requires_grad=True)
y_train = Variable(torch.from_numpy(y_train.to_numpy().ravel()).float(), requires_grad=True)
# Check test loss
model.eval()
x_test = Variable(torch.from_numpy(x_test.to_numpy())).float()
y_test = Variable(torch.from_numpy(y_test.to_numpy().ravel())).float()
y_pred = model(x_test)
print(torch.argmax(y_pred.squeeze(),1).size())
print(y_test.size())
before_train = criterion(torch.argmax(y_pred.squeeze(), 1).float(), y_test)
print('Test loss before training' , before_train.item())

# Train
model.train()
epoch = 20
for epoch in range(epoch):
    optimizer.zero_grad()
    # Forward pass
    y_pred = model(x_train)
    # Compute Loss
    converted = torch.argmax(y_pred.squeeze(),1).float()
    loss = criterion(converted, y_train)
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward(retain_graph=True)
    optimizer.step()

model.eval()
y_pred = model(x_test)
print(torch.argmax(y_pred.squeeze(),1).size())
print(y_test.size())
before_train = criterion(torch.argmax(y_pred.squeeze(), 1).float(), y_test)
print('Test loss after training' , before_train.item())

from sklearn.metrics import accuracy_score
# print(torch.argmax(y_pred.squeeze(), 1).numpy())
# print(y_test.numpy())
accuracy_score(torch.argmax(y_pred.squeeze(), 1).numpy(), y_test.numpy())

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable

import numpy as np
import pandas as pd

from torchdp import PrivacyEngine, utils, autograd_grad_sample

class MLP(torch.nn.Module):
    def __init__(self, hidden_layer_size, in_size, classes):
        super(MLP, self).__init__()
        self.in_size=in_size
        self.hidden_layer_size=hidden_layer_size
        self.classes = classes
        self.forward_1 = torch.nn.Linear(self.in_size,self.hidden_layer_size)
        self.relu = torch.nn.ReLU()
        self.forward_2 = torch.nn.Linear(self.hidden_layer_size, self.classes)
        self.soft_max = torch.nn.Softmax()
        
    def forward(self, x):
        # print(x)
        hidden_layer = self.forward_1(x)
        # print(hidden_layer)
        relu_step = self.relu(hidden_layer)
        out = self.forward_2(relu_step)
        soft_max = self.soft_max(out)
        return torch.argmax(soft_max, 1).float()
    
### Load dataset and split
from load_data import load_data
from sklearn.model_selection import train_test_split
loaded_datasets = load_data()
data = loaded_datasets['car']["data"]
X = data.loc[:, data.columns != loaded_datasets['car']["target"]]
y = data.loc[:, data.columns == loaded_datasets['car']["target"]]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MLP(100, X.shape[1], len(np.unique(y)))
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()

# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)
        
x_train = Variable(torch.from_numpy(x_train.to_numpy()).float(), requires_grad=True)
y_train = Variable(torch.from_numpy(y_train.to_numpy().ravel()).float(), requires_grad=True)
# Check test loss
model.eval()
x_test = Variable(torch.from_numpy(x_test.to_numpy())).float()
y_test = Variable(torch.from_numpy(y_test.to_numpy().ravel())).float()
y_pred = model(x_test)
print(y_pred.size())
print(y_test.size())
before_train = criterion(y_pred, y_test)
print('Test loss before training' , before_train.item())

# Train
model.train()
epoch = 20
for epoch in range(epoch):
    optimizer.zero_grad()
    # Forward pass
    y_pred = model(x_train)
    # Compute Loss
    loss = criterion(y_pred, y_train)
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward(retain_graph=True)
    optimizer.step()

model.eval()
y_pred = model(x_test)
print(y_pred.size())
print(y_test.size())
before_train = criterion(y_pred, y_test)
print('Test loss after training' , before_train.item())

from sklearn.metrics import accuracy_score
# print(torch.argmax(y_pred.squeeze(), 1).numpy())
# print(y_test.numpy())
accuracy_score(y_pred.numpy(), y_test.numpy())

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

import numpy as np
import pandas as pd

from torchdp import PrivacyEngine, utils, autograd_grad_sample


class MLP(nn.Module):
    def __init__(self, input_size, classes, hidden_layer_sizes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size,hidden_layer_sizes[0])
        self.fc2 = nn.Linear(hidden_layer_sizes[0],hidden_layer_sizes[1])
        self.fc3 = nn.Linear(hidden_layer_sizes[1],classes)
        
    def forward(self, x):
        x = self.fc3(F.leaky_relu(self.fc2(F.leaky_relu(self.fc1(x), 0.2)), 0.2))
        return x

### Load dataset and split
from load_data import load_data
from sklearn.model_selection import train_test_split
loaded_datasets = load_data()
data = loaded_datasets['mushroom']["data"]
X = data.loc[:, data.columns != loaded_datasets['mushroom']["target"]]
y = data.loc[:, data.columns == loaded_datasets['mushroom']["target"]]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.preprocessing import MinMaxScaler 
scaler = MinMaxScaler()
x_train_numpy = scaler.fit_transform(x_train)
x_test_numpy = scaler.transform(x_test)
x_train = pd.DataFrame(x_train_numpy, columns = x_train.columns)
x_test = pd.DataFrame(x_test_numpy, columns = x_test.columns)

net = MLP(X.shape[1], len(np.unique(y)), (50,20))

sample_size=len(x_train)
batch_size=min(250, len(x_train))

import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=.02, betas=(0.5, 0.9))
criterion = nn.CrossEntropyLoss()

privacy_engine = PrivacyEngine(
    net,
    batch_size,
    sample_size,
    alphas= [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
    noise_multiplier=3.0,
    max_grad_norm=1.0,
    clip_per_layer=True
)
privacy_engine.attach(optimizer)

target_delta = 1/x_train.shape[0]
print(target_delta)
for epoch in range(500):
    for i in range(int(len(x_train)/batch_size) + 1):
        data2 = x_train.iloc[i*batch_size:i*batch_size+batch_size, :]
        labels = y_train.iloc[i*batch_size:i*batch_size+batch_size, :]
        if len(labels) < batch_size:
            break
        X, Y = Variable(torch.FloatTensor([data2.to_numpy()]), requires_grad=True), Variable(torch.FloatTensor([labels.to_numpy()]), requires_grad=False)
        optimizer.zero_grad()
        y_pred = net(X)
        output = criterion(y_pred.squeeze(), Y.squeeze().long())
        output.backward()
        optimizer.step()
        
    if (epoch % 3 == 0.0):
        print("Epoch {} - loss: {}".format(epoch, output))
        epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(target_delta)
        print ('epsilon is {e}, alpha is {a}'.format(e=epsilon, a = best_alpha))
        if 3.0 < epsilon:
            break
        
predictions = torch.argmax(net(Variable(torch.FloatTensor([x_test.to_numpy()]), requires_grad=True))[0],1)
print(predictions)
from sklearn.metrics import accuracy_score
print('MLP Acc:' + str(accuracy_score(predictions.numpy(), y_test.to_numpy())))

Memory consumed by mushroom:1494944
Memory use too high with mushroom, subsampling to:1000000
Memory consumed by mushroom:1043328
0.00023004370830457787




Epoch 0 - loss: 0.6696429252624512
epsilon is 0.39726294588489375, alpha is 37.0
Epoch 3 - loss: 0.6951021552085876
epsilon is 0.7363773297646037, alpha is 22.0
Epoch 6 - loss: 0.6997715830802917
epsilon is 0.9601826185669726, alpha is 18.0
Epoch 9 - loss: 0.5692386627197266
epsilon is 1.1418089324100684, alpha is 15.0
Epoch 12 - loss: 0.5020430088043213
epsilon is 1.2987387155396775, alpha is 14.0
Epoch 15 - loss: 0.4601525664329529
epsilon is 1.4402855435558197, alpha is 13.0
Epoch 18 - loss: 0.5488324761390686
epsilon is 1.5690691188167256, alpha is 12.0
Epoch 21 - loss: 0.931053876876831
epsilon is 1.6886356656211716, alpha is 10.9
Epoch 24 - loss: 0.5452947616577148
epsilon is 1.801292100034472, alpha is 10.4
Epoch 27 - loss: 0.48831701278686523
epsilon is 1.9080624606533234, alpha is 9.9
Epoch 30 - loss: 0.4481630325317383
epsilon is 2.0097439373377393, alpha is 9.5
Epoch 33 - loss: 0.3437044322490692
epsilon is 2.107108403717419, alpha is 9.2
Epoch 36 - loss: 0.31206318736076355

In [2]:
from diffprivlib.models import LogisticRegression
clf = LogisticRegression(epsilon=3.0).fit(x_train, y_train)
predictions = clf.predict(x_test)
print('DPLR Acc:' + str(accuracy_score(predictions, y_test.to_numpy())))

DPLR Acc:0.9006439742410304


  return f(**kwargs)
