In [567]:
import pandas as pd
import os
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

#!pip install gensim
#!pip install torchview
#from torchview import draw_graph

In [568]:
path = "../input/aalto-csvs/"

In [569]:
features= ['pck_size', 'Ether_type', 'LLC_ctrl', 
           'EAPOL_version', 'EAPOL_type', 'IP_ihl', 
           'IP_tos', 'IP_len', 'IP_flags', 'IP_DF', 
           'IP_ttl', 'IP_options', 'ICMP_code', 'TCP_dataofs', 
           'TCP_FIN', 'TCP_ACK', 'TCP_window', 'UDP_len', 
           'DHCP_options', 'BOOTP_hlen', 'BOOTP_flags', 'BOOTP_sname', 
           'BOOTP_file', 'BOOTP_options', 'DNS_qr', 'DNS_rd', 
           'DNS_qdcount', 'dport_class', 'payload_bytes', 'entropy', 'Label']
print("Total features: ")
print(len(features))

pairs = {'Aria': 0, 'D-LinkCam': 1, 'D-LinkDayCam': 2, 'D-LinkDoorSensor': 3, 'D-LinkHomeHub': 4, 'D-LinkSensor': 5, 
         'D-LinkSiren': 6, 'D-LinkSwitch': 7, 'D-LinkWaterSensor': 8, 'EdimaxCam': 9, 'EdimaxPlug1101W': 10, 
         'EdimaxPlug2101W': 11, 'EdnetCam': 12, 'EdnetGateway': 13, 'HomeMaticPlug': 14, 'HueBridge': 15, 'HueSwitch': 16, 
         'IKettle2': 17, 'Lightify': 18, 'MAXGateway': 19, 'SmarterCoffee': 20, 'TP-LinkPlugHS100': 21, 'TP-LinkPlugHS110': 22,
         'WeMoInsightSwitch': 23, 'WeMoLink': 24, 'WeMoSwitch': 25, 'Withings': 26}

Total features: 
31


In [570]:
class IoTData(Dataset):
    """Dataset to read the csv files."""
    
    # csv_file is the path to CSV file
    def __init__(self, csv_file, lookup, transform=None):
        """
        Args:
            csv_file(string): path to CSV file to be read.
            transform(callable, optional): Apply a transform on data
        This is the initialization of IoTData.
        """
        self.csv_file = pd.read_csv(csv_file, usecols=features)
        self.transform = transform
        self.lookup = lookup
        
    # Returns the length of initialized CSV file.
    def __len__(self):
        return len(self.csv_file)
    
    def __getitem__(self, idx):
        """
        Args:
            idx(integer): The index of row to sample from Dataset.
        Returns:dict
            keys(string, string):"device", "data"
            values(string, tensor): Name of device, tensor of features for device
        """
        data = self.csv_file.iloc[idx,:-1].astype("float64")
        data = torch.Tensor(data.values)
        device = self.csv_file.iloc[idx,-1]
        device = self.lookup[device]
        sample={"device": device, "data": data}
        if self.transform:
            sample = self.transform(sample)
        return sample

In [571]:
import torch.nn.functional as F
def get_one_hot_labels(labels, n_classes):
    '''
    Function for creating one-hot vectors for the labels, returns a tensor of shape (?, num_classes).
    Parameters:
        labels: tensor of labels from the dataloader, size (?)
        n_classes: the total number of classes in the dataset, an integer scalar
    '''
    return F.one_hot(labels, num_classes=n_classes)

In [572]:
def get_noise(n_samples, z_dim, device):
    return torch.randn(n_samples, z_dim, device=device)

In [573]:
def combine_tensors(x, y):
    '''
    Function for combining two tensors with shapes (n_samples, ?) and (n_samples, ?).
    Parameters:
      x: (n_samples, ?) the first tensor. 
      y: (n_samples, ?) the second tensor.
    '''
    combined = torch.cat((x.float(), y.float()), dim = 1)
    return combined

In [574]:
class Reshape(nn.Module):
    def __init__(self, *args):
        super(Reshape, self).__init__()
        self.shape = args

    def forward(self, x):
        return x.view(self.shape)

In [575]:
class Generator(nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim):
        super(Generator, self).__init__()
        self.input_dim = input_dim
        self.gen = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Linear(hidden_dim, hidden_dim*2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(hidden_dim*2, hidden_dim*4),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim*4, out_dim)
        )
    def forward(self, noise):
        return self.gen(noise)

In [576]:
class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Discriminator, self).__init__()
        self.input_dim = input_dim
        self.disc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(hidden_dim, hidden_dim*2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(hidden_dim*2, hidden_dim*4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim*4, 1),
            nn.Sigmoid()
        )
    def forward(self, disc_input):
        return self.disc(disc_input)

In [577]:
criterion = nn.BCEWithLogitsLoss()
epoches = 5
batch_size = 128
z_dim = 28
lr=0.00001
display_step=500
n_classes = 27
generator_input_dim = z_dim + n_classes
print(generator_input_dim)
discriminator_input_dim = len(features)-1 + n_classes


train_data = IoTData(os.path.join(path, "Aalto_train_IoTDevID.csv"), pairs)
train_data = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Running device:{}".format(device))

55
Running device:cpu


In [578]:
"""
x = get_noise(1,10, device)
model = Generator(10,20, 5).to(device)
y = model(x)
model_graph = draw_graph(model, input_size=(1,10), device='meta')
model_graph.visual_graph
"""

"\nx = get_noise(1,10, device)\nmodel = Generator(10,20, 5).to(device)\ny = model(x)\nmodel_graph = draw_graph(model, input_size=(1,10), device='meta')\nmodel_graph.visual_graph\n"

In [579]:
gen = Generator(generator_input_dim, 30, len(features)-1).to(device)
gen_opt = torch.optim.Adam(gen.parameters(), lr=lr)
disc = Discriminator(discriminator_input_dim, 64).to(device)
disc_opt = disc_opt = torch.optim.Adam(disc.parameters(), lr=lr)

In [580]:
cur_step = 0
test_generator = True
generator_losses = []
discriminator_losses = []

for epoch in range(epoches):
    for cur_batch in train_data:
        reals = cur_batch["data"]
        labels = cur_batch["device"]
    # print(labels.dtype)
    #    print(reals.shape)
   #     print(labels.shape)
        one_hots = get_one_hot_labels(labels, n_classes)
    #    print(one_hots.dtype)
        
        disc.zero_grad()
        noises = get_noise(batch_size, z_dim, device)
 #       print(noises.shape)
        noise_with_labels = combine_tensors(noises, one_hots)
#        print(noise_with_labels.dtype)
        fakes = gen(noise_with_labels)
   #     print(fakes.shape)
        fakes_and_labels = combine_tensors(fakes.detach(), one_hots)
        reals_and_labels = combine_tensors(reals, one_hots)
        disc_fake_predictions = disc(fakes_and_labels)
        disc_real_predictions = disc(reals_and_labels)

        disc_fake_loss = criterion(disc_fake_predictions, torch.zeros_like(disc_fake_predictions))
        disc_real_loss = criterion(disc_real_predictions, torch.ones_like(disc_real_predictions))
        disc_loss = (disc_fake_loss + disc_real_loss) / 2
        disc_loss.backward(retain_graph=True)
        disc_opt.step() 
        discriminator_losses += [disc_loss.item()]
        
        gen_opt.zero_grad()
        fake_image_and_labels = combine_tensors(fakes, one_hots)
        disc_fake_pred = disc(fake_image_and_labels)
        gen_loss = criterion(disc_fake_pred, torch.ones_like(disc_fake_pred))
        gen_loss.backward()
        gen_opt.step()
        generator_losses += [gen_loss.item()]
        
        """
        disc_fake_predictions = disc(fakes.detach())
        discriminator_loss = criterion(disc_fake_predictions, torch.zeros_like(disc_fake_predictions))
        
        discriminator_real_predictions = disc(reals)
        discriminator_loss += criterion(discriminator_real_predictions, torch.ones_like(discriminator_real_predictions))
        discriminator_loss/=2
    
        
        
        discriminator_loss.backward(retain_graph=True)
        disc_opt.step()
        mean_discriminator_loss += discriminator_loss.item()/display_step
        
        if test_generator:
            old_generator_weights = gen.gen[0].weight.detach().clone()
        
        gen.zero_grad()
        noise = get_noise(batch_size, z_dim, device)
        fakes = gen(noise)
        discriminator_predictions = disc(fakes)
        generator_loss = criterion(discriminator_predictions, torch.ones_like(discriminator_predictions))
        generator_loss.backward()
        gen_opt.step()
        
        mean_generator_loss += generator_loss/display_step
        if test_generator:
            try:
                assert lr > 0.0000002 or (gen.gen[0][0].weight.grad.abs().max() < 0.0005 and epoch == 0)
                assert torch.any(gen.gen[0][0].weight.detach().clone() != old_generator_weights)
            except:
                error = True
                print("Runtime tests have failed")
        """
        if (cur_step % display_step) == 0 and cur_step > 0:
            mean_generator_loss = sum(generator_losses[-display_step:]) / display_step
            mean_discriminator_loss = sum(discriminator_losses[-display_step:]) / display_step
            print("Epoch : {} -- Step :{} -- Generator loss:{} -- Discriminator loss:{}".format(epoch, cur_step, mean_generator_loss, mean_discriminator_loss))
            noise = get_noise(1,z_dim,device)
            lbls = get_one_hot_labels(torch.tensor([0]),n_classes)
            noise_and_lbls = combine_tensors(noise, lbls)
            fks = gen(noise_and_lbls)
            print(fks)
        cur_step+=1
        if(cur_step%100 == 0):
            print(cur_step)

100
200
300
400
500
Epoch : 0 -- Step :500 -- Generator loss:0.47779188495874403 -- Discriminator loss:0.6461662467718124
tensor([[-0.0066,  0.1291,  0.0756,  0.1102, -0.0470, -0.0088,  0.1488,  0.0880,
         -0.0683,  0.0581, -0.0141, -0.1388, -0.0765,  0.0825, -0.0988,  0.0308,
          0.0217, -0.0254,  0.0167,  0.0680, -0.0026,  0.0471, -0.1011,  0.0741,
         -0.0280,  0.0134,  0.1287,  0.2087,  0.0744, -0.1197]],
       grad_fn=<AddmmBackward0>)
600
700
800
900
1000
Epoch : 1 -- Step :1000 -- Generator loss:0.4860586279630661 -- Discriminator loss:0.6341708116531372
tensor([[-0.0456,  0.1943,  0.0748,  0.2024, -0.1091,  0.0583,  0.2203,  0.1653,
         -0.0363,  0.0818,  0.0038, -0.0697, -0.1107,  0.1777, -0.1117,  0.1085,
          0.0146, -0.0332,  0.0118,  0.0805, -0.0527,  0.1194, -0.1259,  0.0864,
         -0.0414,  0.0016,  0.1059,  0.1814, -0.0113, -0.2259]],
       grad_fn=<AddmmBackward0>)
1100
1200
1300
1400
1500
Epoch : 2 -- Step :1500 -- Generator loss:0.4985