In [1]:
# Mount Google Drive to access files stored in your Drive.
# This allows you to read/write datasets, models, and notebooks directly from Drive.
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
# Move to the project directory
%cd /content/drive/MyDrive/botnet-detection_YK

/content/drive/MyDrive/botnet-detection_YK


In [3]:
# Install required dependencies for the project
!python setup.py install
!pip install numpy==1.26.4
!pip install torch_geometric
!pip install torch_scatter
!pip install deepdish==0.3.6

running install
!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()
!!

        ********************************************************************************
        Please avoid running ``setup.py`` and ``easy_install``.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://github.com/pypa/setuptools/issues/917 for details.
        ********************************************************************************

!!
  self.initialize_options()
running bdist_egg
running egg_info
writing botdet.egg-info/PKG-INFO
writing dependency_links to botd

In [4]:
# ---------------------------------------------------------
# Check installed versions of PyTorch and PyTorch Geometric
# This ensures compatibility with torch_scatter and dataset
# processing, since mismatched versions can cause runtime
# errors in message-passing layers.
# ---------------------------------------------------------
import torch
import torch_geometric
print(torch.__version__)
print(torch_geometric.__version__)

2.9.0+cu126
2.7.0


# Implementation of Case 3: All-Ones Features + Symmetric Normalization

In [6]:
# To make sure we generate graph data with all-ones feature values,
# we first remove the previously processed data directory.
!rm -rf /content/drive/MyDrive/botnet-detection_YK/data/botnet/processed

In [7]:
# ------------------------------------------------------------
# Load BotnetDataset (Cases 3 & 4: Ones features)
# Default setting: add_nfeat_ones=True already creates x = ones
# ------------------------------------------------------------

from botdet.data.dataset_botnet import BotnetDataset
from botdet.data.dataloader import GraphDataLoader

# ------------------------
# TRAIN, VALIDATION, TEST SPLIT
# ------------------------
botnet_dataset_train = BotnetDataset(name='chord', split='train', graph_format='pyg',add_nfeat_ones = True)
botnet_dataset_val = BotnetDataset(name='chord', split='val', graph_format='pyg',add_nfeat_ones = True)
botnet_dataset_test = BotnetDataset(name='chord', split='test', graph_format='pyg',add_nfeat_ones = True)

# ------------------------
# DataLoader Wrappers
# ------------------------
train_loader = GraphDataLoader(botnet_dataset_train, batch_size=2, shuffle=False, num_workers=0)
val_loader = GraphDataLoader(botnet_dataset_val, batch_size=1, shuffle=False, num_workers=0)
test_loader = GraphDataLoader(botnet_dataset_test, batch_size=1, shuffle=False, num_workers=0)

Processing...
writing train set ----------


768it [04:21,  2.94it/s]


train split --- number of graphs: 768, data saved at data/botnet/processed/chord_train.hdf5.
writing val set ----------


96it [00:05, 17.43it/s]


val split --- number of graphs: 96, data saved at data/botnet/processed/chord_val.hdf5.
writing test set ----------


96it [00:11,  8.63it/s]


test split --- number of graphs: 96, data saved at data/botnet/processed/chord_test.hdf5.
Done!


In [8]:
# Check whether the initial X is assigned to ones.
g0 = botnet_dataset_train[0]
print(g0.x[:5])
print(g0.x.shape)

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.]])
torch.Size([143107, 1])


In [9]:
# Running the full training script
# It will save the best model and show the test result
!bash run_botnet_ones_sm.sh

All Logs will be saved to ./saved_models/chord_model_lay12_rh1_ones_sm_ep50.log
python train_botnet.py --devid 0 --data_dir ./data/botnet --data_name chord --batch_size 2 --enc_sizes 32 32 32 32 32 32 32 32 32 32 32 32 --act relu --residual_hop 1 --deg_norm sm --final proj --epochs 50 --lr 0.005 --early_stop 1 --save_dir ./saved_models --save_name chord_model_lay12_rh1_ones_sm_ep50.pt
------------------------------
Namespace(devid=0, seed=0, logmode='w', log_interval=96, data_dir='./data/botnet', data_name='chord', batch_size=2, in_memory=True, shuffle=False, in_channels=1, enc_sizes=[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], act='relu', layer_act='none', residual_hop=1, edge_gate='none', n_classes=2, nodemodel='additive', final='proj', nheads=[1], att_act='lrelu', att_dropout=0, att_dir='in', att_combine='cat', att_combine_out='cat', deg_norm='sm', aggr='add', dropout=0.0, bias=True, lr=0.005, weight_decay=0.0005, epochs=50, early_stop=1, save_dir='./saved_models', save_name='c

# Implementation of Case 4: All-Ones Features + Randomwalk Normalization

In [10]:
# Case 4: With the same all-ones features, train the model with random-walk normalization
!bash run_botnet_ones_rw.sh

All Logs will be saved to ./saved_models/chord_model_lay12_rh1_ones_rw_ep50.log
python train_botnet.py --devid 0 --data_dir ./data/botnet --data_name chord --batch_size 2 --enc_sizes 32 32 32 32 32 32 32 32 32 32 32 32 --act relu --residual_hop 1 --deg_norm rw --final proj --epochs 50 --lr 0.005 --early_stop 1 --save_dir ./saved_models --save_name chord_model_lay12_rh1_ones_rw_ep50.pt
------------------------------
Namespace(devid=0, seed=0, logmode='w', log_interval=96, data_dir='./data/botnet', data_name='chord', batch_size=2, in_memory=True, shuffle=False, in_channels=1, enc_sizes=[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], act='relu', layer_act='none', residual_hop=1, edge_gate='none', n_classes=2, nodemodel='additive', final='proj', nheads=[1], att_act='lrelu', att_dropout=0, att_dir='in', att_combine='cat', att_combine_out='cat', deg_norm='rw', aggr='add', dropout=0.0, bias=True, lr=0.005, weight_decay=0.0005, epochs=50, early_stop=1, save_dir='./saved_models', save_name='c

# Implementation of Case 5: Integer Features + Symmetric Normalization

In [11]:
# To regenerate graph data with integer feature values,
# we first remove the previously processed data directory.
!rm -rf /content/drive/MyDrive/botnet-detection_YK/data/botnet/processed

In [12]:
# --- CASE 5 & 6: Integer Feature Input (0, 1, ..., n-1) ---
# We disable add_nfeat_ones and enable use_integer_x.
# This requires regenerating the processed dataset beforehand.

from botdet.data.dataset_botnet import BotnetDataset
from botdet.data.dataloader import GraphDataLoader

botnet_dataset_train = BotnetDataset(name='chord', split='train', graph_format='pyg',add_nfeat_ones = False, use_integer_x=True)
botnet_dataset_val = BotnetDataset(name='chord', split='val', graph_format='pyg',add_nfeat_ones = False, use_integer_x=True)
botnet_dataset_test = BotnetDataset(name='chord', split='test', graph_format='pyg',add_nfeat_ones = False, use_integer_x=True)

train_loader = GraphDataLoader(botnet_dataset_train, batch_size=2, shuffle=False, num_workers=0)
val_loader = GraphDataLoader(botnet_dataset_val, batch_size=1, shuffle=False, num_workers=0)
test_loader = GraphDataLoader(botnet_dataset_test, batch_size=1, shuffle=False, num_workers=0)

Processing...
writing train set ----------


768it [04:13,  3.03it/s]


train split --- number of graphs: 768, data saved at data/botnet/processed/chord_train.hdf5.
writing val set ----------


96it [00:07, 12.55it/s]


val split --- number of graphs: 96, data saved at data/botnet/processed/chord_val.hdf5.
writing test set ----------


96it [00:07, 13.01it/s]


test split --- number of graphs: 96, data saved at data/botnet/processed/chord_test.hdf5.
Done!


In [22]:
# Check whether the initial X is assigned to integers.
g0 = botnet_dataset_train[0]
print(g0.x[:5])
print(g0.x.shape)

tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.]])
torch.Size([143107, 1])


In [23]:
!bash run_botnet_int_sm.sh

All Logs will be saved to ./saved_models/chord_model_lay12_rh1_int_sm_ep50.log
python train_botnet.py --devid 0 --data_dir ./data/botnet --data_name chord --batch_size 2 --enc_sizes 32 32 32 32 32 32 32 32 32 32 32 32 --act relu --residual_hop 1 --deg_norm sm --final proj --epochs 50 --lr 0.005 --early_stop 1 --save_dir ./saved_models --save_name chord_model_lay12_rh1_int_sm_ep50.pt
------------------------------
Namespace(devid=0, seed=0, logmode='w', log_interval=96, data_dir='./data/botnet', data_name='chord', batch_size=2, in_memory=True, shuffle=False, in_channels=1, enc_sizes=[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], act='relu', layer_act='none', residual_hop=1, edge_gate='none', n_classes=2, nodemodel='additive', final='proj', nheads=[1], att_act='lrelu', att_dropout=0, att_dir='in', att_combine='cat', att_combine_out='cat', deg_norm='sm', aggr='add', dropout=0.0, bias=True, lr=0.005, weight_decay=0.0005, epochs=50, early_stop=1, save_dir='./saved_models', save_name='cho

# Implementation of Case 6: Integer Features + Randomwalk Normalization

In [24]:
!bash run_botnet_int_rw.sh

All Logs will be saved to ./saved_models/chord_model_lay12_rh1_int_rw_ep50.log
python train_botnet.py --devid 0 --data_dir ./data/botnet --data_name chord --batch_size 2 --enc_sizes 32 32 32 32 32 32 32 32 32 32 32 32 --act relu --residual_hop 1 --deg_norm rw --final proj --epochs 50 --lr 0.005 --early_stop 1 --save_dir ./saved_models --save_name chord_model_lay12_rh1_int_rw_ep50.pt
------------------------------
Namespace(devid=0, seed=0, logmode='w', log_interval=96, data_dir='./data/botnet', data_name='chord', batch_size=2, in_memory=True, shuffle=False, in_channels=1, enc_sizes=[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], act='relu', layer_act='none', residual_hop=1, edge_gate='none', n_classes=2, nodemodel='additive', final='proj', nheads=[1], att_act='lrelu', att_dropout=0, att_dir='in', att_combine='cat', att_combine_out='cat', deg_norm='rw', aggr='add', dropout=0.0, bias=True, lr=0.005, weight_decay=0.0005, epochs=50, early_stop=1, save_dir='./saved_models', save_name='cho