In [1]:
import sys
import numpy as np

import torch
from torch.nn import Parameter
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torch.autograd import Variable
import Sklearn_PyTorch

import syft as sy

  from ._conv import register_converters as _register_converters





## ACM KDD'99

In [2]:
data_path = "../../../Dataset/KDD99/kddcup99.csv"

dataset = pd.read_csv(data_path, sep=',', usecols=range(0, 42))

print("Dataset Shape:", dataset.shape)

Dataset Shape: (494020, 42)


In [3]:
data_server = dataset.sample(frac=0.5, random_state=1)
dataset = dataset.drop(data_server.index)
data_alice = dataset.sample(frac=0.5, random_state=1)
data_bob = dataset.drop(data_alice.index)

## Data Preprocessing

In [4]:
from sklearn import preprocessing

In [5]:
print(data_server)

        duration protocol_type  service flag  src_bytes  dst_bytes  land  \
175542         0          icmp    ecr_i   SF       1032          0     0   
399593         0          icmp    ecr_i   SF        520          0     0   
378282         0           tcp  private   S0          0          0     0   
338019         0          icmp    ecr_i   SF       1032          0     0   
174680         0          icmp    ecr_i   SF       1032          0     0   
...          ...           ...      ...  ...        ...        ...   ...   
322889         0          icmp    ecr_i   SF       1032          0     0   
213783         0          icmp    ecr_i   SF       1032          0     0   
259270         0          icmp    ecr_i   SF       1032          0     0   
458893         0           tcp     http   SF        312        380     0   
37308          0           tcp     http   SF        318        438     0   

        wrong_fragment  urgent  hot  ...  dst_host_srv_count  \
175542               0 

### Transforming categorical feature to numerical feature

In [6]:
def encoding(data):
    for col in data.columns:
        if data[col].dtype == type(object):
            le_x = preprocessing.LabelEncoder()
            le_x.fit(data[col])
            data[col] = le_x.transform(data[col])
    return data

In [7]:
data_server_x = pd.DataFrame(data_server.iloc[:, 0:41])
data_server_y = pd.DataFrame(data_server.iloc[:, 41])
data_alice_x = pd.DataFrame(data_alice.iloc[:, 0:41])
data_alice_y = pd.DataFrame(data_alice.iloc[:, 41])
data_bob_x = pd.DataFrame(data_bob.iloc[:, 0:41])
data_bob_y = pd.DataFrame(data_bob.iloc[:, 41])

In [8]:
new_class = {'back':'abnormal', 'buffer_overflow':'abnormal', 'ftp_write':'abnormal', 'guess_passwd':'abnormal', 'imap':'abnormal',
            'ipsweep':'abnormal', 'land':'abnormal', 'loadmodule':'abnormal', 'multihop':'abnormal', 'neptune':'abnormal', 'nmap':'abnormal',
            'perl':'abnormal', 'phf':'abnormal', 'pod':'abnormal', 'portsweep':'abnormal', 'rootkit':'abnormal', 'satan':'abnormal',
            'smurf':'abnormal', 'spy':'abnormal', 'teardrop':'abnormal', 'warezclient':'abnormal', 'warezmaster':'abnormal'}
data_server_y = data_server_y.replace(new_class)
data_alice_y = data_alice_y.replace(new_class)
data_bob_y = data_bob_y.replace(new_class)

In [9]:
data_server_x = encoding(data_server_x)
data_server_y = encoding(data_server_y)
data_alice_x = encoding(data_alice_x)
data_alice_y = encoding(data_alice_y)
data_bob_x = encoding(data_bob_x)
data_bob_y = encoding(data_bob_y)

In [10]:
print(data_server_x)

        duration  protocol_type  service  flag  src_bytes  dst_bytes  land  \
175542         0              0       14     9       1032          0     0   
399593         0              0       14     9        520          0     0   
378282         0              1       44     5          0          0     0   
338019         0              0       14     9       1032          0     0   
174680         0              0       14     9       1032          0     0   
...          ...            ...      ...   ...        ...        ...   ...   
322889         0              0       14     9       1032          0     0   
213783         0              0       14     9       1032          0     0   
259270         0              0       14     9       1032          0     0   
458893         0              1       22     9        312        380     0   
37308          0              1       22     9        318        438     0   

        wrong_fragment  urgent  hot  ...  dst_host_count  dst_h

### Normalization

In [11]:
def normalize(df): 
    x = df.values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled)
    return df

In [12]:
data_server_x = normalize(data_server_x)
data_alice_x = normalize(data_alice_x)
data_bob_x = normalize(data_bob_x)

In [13]:
print(data_server_x)

         0    1         2    3             4         5    6    7    8    9   \
0       0.0  0.0  0.222222  0.9  1.488371e-06  0.000000  0.0  0.0  0.0  0.0   
1       0.0  0.0  0.222222  0.9  7.499542e-07  0.000000  0.0  0.0  0.0  0.0   
2       0.0  0.5  0.698413  0.5  0.000000e+00  0.000000  0.0  0.0  0.0  0.0   
3       0.0  0.0  0.222222  0.9  1.488371e-06  0.000000  0.0  0.0  0.0  0.0   
4       0.0  0.0  0.222222  0.9  1.488371e-06  0.000000  0.0  0.0  0.0  0.0   
...     ...  ...       ...  ...           ...       ...  ...  ...  ...  ...   
247005  0.0  0.0  0.222222  0.9  1.488371e-06  0.000000  0.0  0.0  0.0  0.0   
247006  0.0  0.0  0.222222  0.9  1.488371e-06  0.000000  0.0  0.0  0.0  0.0   
247007  0.0  0.0  0.222222  0.9  1.488371e-06  0.000000  0.0  0.0  0.0  0.0   
247008  0.0  0.5  0.349206  0.9  4.499725e-07  0.000074  0.0  0.0  0.0  0.0   
247009  0.0  0.5  0.349206  0.9  4.586259e-07  0.000085  0.0  0.0  0.0  0.0   

        ...   31        32    33    34   35   36   

### One-Hot Encoding

In [14]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(data_server_y)
data_server_y = enc.transform(data_server_y).toarray() #Encode the classes to a binary array 
enc.fit(data_alice_y)
data_alice_y = enc.transform(data_alice_y).toarray()
enc.fit(data_bob_y)
data_bob_y = enc.transform(data_bob_y).toarray()

In [15]:
print(data_server_y.shape)
print(data_alice_y.shape)
print(data_bob_y.shape)

(247010, 2)
(123505, 2)
(123505, 2)


In [16]:
print(data_server_y)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [1. 0.]
 [0. 1.]
 [0. 1.]]


## IoT Botnet Stream Data

In [None]:
# Load all the data from the CSV file 
BM_DATA_PATH = "../../../Dataset/Botnet_Detection/Philips_B120N10_Baby_Monitor"
DB_DATA_PATH = "../../../Dataset/Botnet_Detection/Danmini_Doorbell"
ET_DATA_PATH = "../../../Dataset/Botnet_Detection/Ecobee_Thermostat"
df_bm_b = pd.read_csv(BM_DATA_PATH+"/benign_traffic.csv")
df_bm_m = pd.read_csv(BM_DATA_PATH+"/Mirai/udp.csv")
df_db_b = pd.read_csv(DB_DATA_PATH+"/benign_traffic.csv")
df_db_m = pd.read_csv(DB_DATA_PATH+"/Mirai/udp.csv")
df_et_b = pd.read_csv(ET_DATA_PATH+"/benign_traffic.csv")
df_et_m = pd.read_csv(ET_DATA_PATH+"/Mirai/udp.csv")

In [None]:
print(df_bm_g.shape)
print(df_db_g.shape)
print(df_et_g.shape)

In [None]:
# list(data) or 
list(df_bm_b.columns) 

In [None]:
df_bm_b.iloc[:, 13:16].head()

## Start Transfering data to workers

In [17]:
hook = sy.TorchHook(torch)

In [18]:
Alice = sy.VirtualWorker(hook, id='Alice')
Bob = sy.VirtualWorker(hook, id='Bob')

In [19]:
from sklearn.model_selection import train_test_split
a_train_x, a_test_x, a_train_y, a_test_y = train_test_split(data_alice_x, data_alice_y, test_size=0.20)
b_train_x, b_test_x, b_train_y, b_test_y = train_test_split(data_bob_x, data_bob_y, test_size=0.20)

In [20]:
tensor_server_x = torch.FloatTensor(data_server_x.values.astype(np.float32))
tensor_server_y = torch.FloatTensor(data_server_y.astype(np.float32))
t_a_train_x = torch.tensor(a_train_x.values.astype(np.float32))
t_a_test_x = torch.tensor(a_test_x.values.astype(np.float32))
t_a_train_y = torch.tensor(a_train_y.astype(np.float32))
t_a_test_y = torch.tensor(a_test_y.astype(np.float32))
t_b_train_x = torch.tensor(b_train_x.values.astype(np.float32))
t_b_test_x = torch.tensor(b_test_x.values.astype(np.float32))
t_b_train_y = torch.tensor(b_train_y.astype(np.float32))
t_b_test_y = torch.tensor(b_test_y.astype(np.float32))

In [21]:
print(t_b_test_y.shape)
print(t_a_test_y.shape)

torch.Size([24701, 2])
torch.Size([24701, 2])


In [22]:
a_x_train_ptr = t_a_train_x.send(Alice)
a_x_test_ptr = t_a_test_x.send(Alice)
a_y_train_ptr = t_a_train_y.send(Alice)
a_y_test_ptr = t_a_test_y.send(Alice)
b_x_train_ptr = t_b_train_x.send(Bob)
b_x_test_ptr = t_b_test_x.send(Bob)
b_y_train_ptr = t_b_train_y.send(Bob)
b_y_test_ptr = t_b_test_y.send(Bob)

In [23]:
print(Bob._objects)

{10342095302: tensor([[0.0000, 0.0000, 0.2188,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.3438,  ..., 0.0100, 0.0400, 0.0400],
        [0.0000, 0.0000, 0.2188,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.2188,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2188,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.7031,  ..., 1.0000, 0.0000, 0.0000]]), 69127704134: tensor([[0.0000, 0.0000, 0.2188,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2188,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.7031,  ..., 1.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.5000, 0.7031,  ..., 1.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.7031,  ..., 0.0000, 1.0000, 1.0000],
        [0.0000, 0.0000, 0.2188,  ..., 0.0000, 0.0000, 0.0000]]), 8457591110: tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]]), 47100924285: tensor([[1., 0.],
        [

In [24]:
print(Alice._objects)

{64943329861: tensor([[0.0000, 0.5000, 0.3548,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.7097,  ..., 0.0000, 1.0000, 1.0000],
        [0.0000, 0.0000, 0.2258,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 1.0000, 0.1774,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.0000, 0.7097,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.7097,  ..., 0.0000, 1.0000, 1.0000]]), 66078015922: tensor([[0.0000, 0.5000, 0.7097,  ..., 1.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.7097,  ..., 1.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2258,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.5000, 0.7097,  ..., 1.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.3548,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2258,  ..., 0.0000, 0.0000, 0.0000]]), 31634114022: tensor([[0., 1.],
        [1., 0.],
        [1., 0.],
        ...,
        [0., 1.],
        [0., 1.],
        [1., 0.]]), 93807833313: tensor([[1., 0.],
        

In [None]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs

In [28]:
print(tensor_server_x)
print(tensor_server_y)

tensor([[0.0000, 0.0000, 0.2222,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2222,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.6984,  ..., 1.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.2222,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.3492,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.3492,  ..., 0.0000, 0.0000, 0.0000]])
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [0., 1.],
        [0., 1.]])


In [25]:
from Sklearn_PyTorch import TorchRandomForestClassifier

# Initialisation of the model
my_model = TorchRandomForestClassifier(nb_trees=100, nb_samples=3, max_depth=5, bootstrap=True)

# Fitting function
my_model.fit(tensor_server_x, tensor_server_y)


ValueError: only one element tensors can be converted to Python scalars

### Initialize the parameters

In [None]:
epochs = 3
input_dim = 41
output_dim = 2 #Number of clasees
lr_rate = 0.001

In [None]:
model = LogisticRegression(input_dim, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate)

In [None]:
def training(epochs, model, data, labels):
    print(epochs)
    for epochs in range(int(epochs)):    
        print("In the loop")
        optimizer.zero_grad() ## Zero out the gradient
        outputs = model(data) ## Call forward
        print(outputs)
        print(labels)
        loss = ((outputs - labels)**2).sum() ## softmax
        print(loss)
        loss.backward() ## Accumulated gradient updates into x
        optimizer.step()

In [None]:
tensor_server_y = tensor_server_y.squeeze()
training(epochs, model, tensor_server_x, tensor_server_y) ## Train the initial model on Server

## Transfer model to clients

In [None]:
bobs_model = model.copy().send(Bob)
alices_model = model.copy().send(Alice)

bobs_opt = torch.optim.SGD(params=bobs_model.parameters(),lr=lr_rate)
alices_opt = torch.optim.SGD(params=alices_model.parameters(),lr=lr_rate)

In [None]:
print(Bob._objects)

## Sencond Training with local data

In [None]:
for i in range(2):

    # Train Bob's Model
    bobs_opt.zero_grad()
    bobs_pred = bobs_model(b_x_train_ptr)
    bobs_loss = ((bobs_pred - b_y_train_ptr)**2).sum()
    bobs_loss.backward()

    bobs_opt.step()
    bobs_loss = bobs_loss.get().data

    # Train Alice's Model
    alices_opt.zero_grad()
    alices_pred = alices_model(a_x_train_ptr)
    alices_loss = ((alices_pred - a_y_train_ptr)**2).sum()
    alices_loss.backward()

    alices_opt.step()
    alices_loss = alices_loss.get().data

    total = 24701
    correct = 0
    outputs_a = alices_model(a_x_test_ptr)
    _a, pred_a = torch.max(outputs_a.data, 1)
    va, labels_a = torch.max(a_y_test_ptr.data, 1)
    correct+= (pred_a == labels_a).sum()
    accuracy_a = 100*correct/total
    print("Iteration:", i, "ALice Accuracy: ", accuracy_a.get().data)

    correct = 0
    outputs_b = bobs_model(b_x_test_ptr)
    _b, pred_b = torch.max(outputs_b.data, 1)
    vb, labels_b = torch.max(b_y_test_ptr.data, 1)
    correct+= (pred_b == labels_b).sum()
    accuracy_b = 100*correct/total
    print("Iteration:", i, "Bob Accuracy: ", accuracy_b.get().data)
    

In [None]:
print(Bob._objects)

In [None]:
print(Alice._objects)