# Load DNS and mDNS datasets

In [1]:
import os
import sys

import numpy as np
import pandas as pd

module_path = os.path.abspath(os.path.join('.'))
if module_path not in sys.path:
    sys.path.append(module_path)

import torch
import torch_geometric.transforms as T

from loader import DNS

In [2]:
cuda_device = 4

if torch.cuda.is_available():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(cuda_device)
    
torch.manual_seed(42)

<torch._C.Generator at 0x7f0e3cd5cc30>

## Load Graphs

In [3]:
kg_path = lambda graph_name: f'../data/{graph_name}'

#### mDNS

In [4]:
dataset = DNS(root=kg_path('mDNS'), transform=T.Compose([T.NormalizeFeatures(), T.ToUndirected()]))
dataset[0]

Remove parallel edges: Series([], dtype: int64)


HeteroData(
  [1mip_node[0m={
    num_nodes=4505,
    x=[4505, 2]
  },
  [1mdomain_node[0m={
    num_nodes=7495,
    x=[7495, 10],
    y=[7495],
    train_mask=[7495],
    test_mask=[7495],
    val_mask=[7495]
  },
  [1m(domain_node, apex, domain_node)[0m={ edge_index=[2, 3320] },
  [1m(ip_node, resolves, domain_node)[0m={ edge_index=[2, 34119] },
  [1m(domain_node, similar, domain_node)[0m={ edge_index=[2, 3012] },
  [1m(domain_node, rev_resolves, ip_node)[0m={ edge_index=[2, 34119] }
)

#### DNS

In [5]:
dataset = DNS(root=kg_path('DNS'), transform=T.Compose([T.NormalizeFeatures(), T.ToUndirected()]))
dataset[0]

Remove parallel edges: type
similar    50910
dtype: int64


HeteroData(
  [1mip_node[0m={
    num_nodes=73593,
    x=[73593, 2]
  },
  [1mdomain_node[0m={
    num_nodes=373475,
    x=[373475, 10],
    y=[373475],
    train_mask=[373475],
    test_mask=[373475],
    val_mask=[373475]
  },
  [1m(domain_node, apex, domain_node)[0m={ edge_index=[2, 178944] },
  [1m(domain_node, resolves, ip_node)[0m={ edge_index=[2, 730438] },
  [1m(domain_node, similar, domain_node)[0m={ edge_index=[2, 155356] },
  [1m(ip_node, rev_resolves, domain_node)[0m={ edge_index=[2, 730438] }
)

In [6]:
dataset.to_homogeneous()

Data(
  node_type=[447068],
  x=[447068, 12],
  edge_index=[2, 897588],
  edge_type=[897588],
  train_mask=[447068],
  val_mask=[447068],
  test_mask=[447068],
  y=[447068],
  edge_map={
    0=[2],
    1=[2],
    2=[2]
  },
  num_nodes=447068
)