In [1]:
import numpy as np
import csv
import pandas as pd
from colorsys import hsv_to_rgb
from tqdm.notebook import tqdm
from PIL import Image
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import torch
import torchvision
from torchvision import transforms, utils, datasets
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import shutil
import seaborn as sns
from scipy.linalg import khatri_rao
from sklearn.metrics import mean_squared_error
import time
import joblib
import ast

In [2]:
%matplotlib inline
sns.set_style('dark')

In [3]:
#Choose topology
od_list = list(pd.read_csv("../data/ods.csv").to_numpy())
node_list = list(pd.read_csv("../data/nodes.csv").to_numpy().flatten())
host_list = list(pd.read_csv("../data/hosts.csv").to_numpy().flatten())
switch_list = list(pd.read_csv("../data/switches.csv").to_numpy().flatten())
edge_list = list(pd.read_csv("../data/edges.csv").to_numpy())
routing_matrix = pd.read_csv("../data/routing_matrix.csv").to_numpy()
M = routing_matrix.shape[0]
L = routing_matrix.shape[1]

indices_per_host = []
for host in host_list:
    temp = []
    for i in range(len(od_list)):
        od = od_list[i]
        if od[0] == host:
            temp.append(i)
    indices_per_host.append(temp)
indices_per_host = np.array(indices_per_host)
#####################

In [4]:
#Choose distribution
distro = "Poisson"
# distro = "Normal"
# distro = "mixedPoisson"
####################

# Definitions

In [5]:
image_transforms = {
    "train": transforms.Compose([
        transforms.Resize((90, 90)),
        transforms.ToTensor(),
        transforms.Grayscale()
    ]),
    "test": transforms.Compose([
        transforms.Resize((90, 90)),
        transforms.ToTensor(),
        transforms.Grayscale()
    ])
}

In [6]:
def get_class_distribution(dataset_obj):
    count_dict = {k:0 for k,v in dataset_obj.class_to_idx.items()}
    for _, label_id in dataset_obj:
        label = idx2class[label_id]
        count_dict[label] += 1
    return count_dict

In [7]:
def get_class_distribution_loaders(dataloader_obj, dataset_obj):
    count_dict = {k:0 for k,v in dataset_obj.class_to_idx.items()}
    if dataloader_obj.batch_size == 1:    
        for _,label_id in dataloader_obj:
            y_idx = label_id.item()
            y_lbl = idx2class[y_idx]
            count_dict[str(y_lbl)] += 1
    else: 
        for _,label_id in dataloader_obj:
            for idx in label_id:
                y_idx = idx.item()
                y_lbl = idx2class[y_idx]
                count_dict[str(y_lbl)] += 1
    return count_dict

In [8]:
def plot_from_dict(dict_obj, plot_title, **kwargs):
    return sns.barplot(data = pd.DataFrame.from_dict([dict_obj]).melt(), x = "variable", y="value", **kwargs).set_title(plot_title)

In [9]:
class Classifier(nn.Module):
    def __init__(self, numChannels, classes):
        super(Classifier, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=numChannels, out_channels=20,kernel_size=(5, 5), stride=(1,1))
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=10,kernel_size=(4, 4), stride=(1,1))
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.fc1 = nn.Linear(in_features=4000, out_features=500)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(in_features=500, out_features=classes)
        self.logSoftmax = nn.LogSoftmax(dim=1)
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x

In [10]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    acc = torch.round(acc * 100)
    return acc

In [11]:
accuracy_stats = {
    'train': [],
    "val": []
}
loss_stats = {
    'train': [],
    "val": []
}

In [13]:
class_to_idx = {}
idx_to_class = {}
with open('../model/class_to_idx.csv','r') as csvfile:
    rd = csv.reader(csvfile,delimiter = ',')
    for row in rd:
        class_to_idx = ast.literal_eval(row[0])
        break
        
with open('../model/idx_to_class.csv','r') as csvfile:
    rd = csv.reader(csvfile,delimiter = ',')
    for row in rd:
        idx_to_class = ast.literal_eval(row[0])
        break

In [14]:
def rank_links(routing_matrix, od_list, host_list, htui):
    M = routing_matrix.shape[0]
    L = routing_matrix.shape[1]
    links_ranks = np.zeros(M)
    for row in range(M):
        temp = 0
        for col in range(L):
            if (routing_matrix[row,col] == 1):
                od = od_list[col]
                if not host_list.index(od[0]) in htui:
                    temp += 1
        links_ranks[row] = temp
    return links_ranks

In [15]:
def score(model, image_transforms, image_path, idx2class):
    model = model.eval()
    with torch.no_grad():
        image = Image.open(image_path)
        image = image_transforms(image).float()
        image = image.unsqueeze(0)
        output = model(image).numpy()[0]
        indices = np.argsort(output)[::-1]
        classes_sorted = [int(idx2class[x]) for x in indices]
        return classes_sorted

# Preprocessor

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("We're using =>", device)

We're using => cuda


In [17]:
# model = torch.load('../model/CNN.pth')
model = Classifier(1,len(host_list))
PATH = '../model/CNN.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [18]:
image_names = os.listdir("../data/samples/"+distro+"/samples_images/")
for imn in image_names:
    if not ('png' in imn):
        image_names.remove(imn)

In [19]:
# choose number of samples used in preprocessor
nos_used_in_cnn = 100
# choose number of nodes to keep
non_to_keep = 10

In [20]:
dataset = np.random.choice(a = image_names, size = nos_used_in_cnn, replace = False)

In [21]:
scores = []
for d in dataset:
    scores.append(score(model, image_transforms["test"], "../data/samples/"+distro+"/samples_images/"+d, idx_to_class)[0:non_to_keep])

In [22]:
score_count_dict = {}
for host in host_list:
    score_count_dict[host] = 0

In [23]:
for sc in scores:
    for ent in sc:
        score_count_dict[ent] = score_count_dict[ent] + 1

In [24]:
hosts_to_keep = np.array(list(score_count_dict.keys()))[np.argsort(list(score_count_dict.values()))[::-1][0:non_to_keep]]

In [25]:
hosts_to_keep

array([ 0,  6,  9, 10,  1, 14, 22, 15,  2, 13], dtype=int64)