In [1]:
import pandas as pd
import os
import pickle

import networkx as nx

from src.dataset.dataset_info import datasets

In [2]:
# name = "cic_ton_iot_5_percent"
name = "cic_ton_iot"
# name = "cic_ids_2017_5_percent"
# name = "cic_ids_2017"
# name = "cic_bot_iot"
# name = "cic_ton_iot_modified"
# name = "nf_ton_iotv2_modified"
# name = "ccd_inid_modified"
# name = "nf_uq_nids_modified"
# name = "edge_iiot"
# name = "nf_cse_cic_ids2018"
# name = "nf_bot_iotv2"
# name = "nf_uq_nids"
# name = "x_iiot"

dataset = datasets[name]

In [3]:
import networkx as nx
import igraph as ig
import json
import timeit

import time
from functools import wraps


def time_execution(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        # Check if verbose is in kwargs, defaulting to False if not provided
        verbose = kwargs.get("verbose", False)
        if not verbose:
            start_time = timeit.default_timer()
            result = func(*args, **kwargs)
            print(
                f"==>> {func.__name__}: {result}, in {str(timeit.default_timer() - start_time)} seconds")
        else:
            result = func(*args, **kwargs)
        return result
    return wrapper


@time_execution
def number_of_nodes(G, verbose):
    return G.number_of_nodes()


@time_execution
def number_of_edges(G, verbose):
    return G.number_of_edges()


@time_execution
def transitivity(G, verbose):
    return nx.transitivity(G)


@time_execution
def density(G, verbose):
    return nx.density(G)


@time_execution
def mixing_parameter(G, communities, verbose):

    # Step 1: Map each node to its community
    node_to_community = {}
    for community_index, community in enumerate(communities):
        for node in community:
            node_to_community[node] = community_index

    # Step 2: Count inter-cluster edges efficiently
    inter_cluster_edges = 0
    for u, v in G.edges():
        # Directly check if u and v belong to different communities
        if node_to_community[u] != node_to_community[v]:
            inter_cluster_edges += 1

    mixing_parameter = inter_cluster_edges / G.number_of_edges()

    return mixing_parameter


@time_execution
def modularity(G, communities, verbose):

    start_time = timeit.default_timer()
    modularity = nx.community.modularity(G, communities)
    if verbose:
        print(
            f"==>> modularity: {modularity}, in {str(timeit.default_timer() - start_time)} seconds")

    return modularity


def get_degrees(G, verbose):
    start_time = timeit.default_timer()
    degrees = [degree for _, degree in G.degree()]
    if verbose:
        print(
            f"==>> calculated degrees, in {str(timeit.default_timer() - start_time)} seconds")
    return degrees


def find_communities(G, verbose):

    start_time = timeit.default_timer()
    G1 = ig.Graph.from_networkx(G)

    part = G1.community_infomap()
    # part = G1.community_multilevel()
    # part = G1.community_spinglass()
    # part = G1.community_edge_betweenness()

    communities = []
    for com in part:
        communities.append([G1.vs[node_index]['_nx_name']
                           for node_index in com])

    # communities = nx.community.louvain_communities(G)
    if verbose:
        print(
            f"==>> number_of_communities: {len(communities)}, in {str(timeit.default_timer() - start_time)} seconds")

    return communities


def calculate_graph_measures(G, file_path=None, verbose=False):

    properties = {}

    properties["number_of_nodes"] = number_of_nodes(G, verbose)
    properties["number_of_edges"] = number_of_edges(G, verbose)

    degrees = get_degrees(G, verbose)

    properties["max_degree"] = max(degrees)
    properties["avg_degree"] = sum(degrees) / len(degrees)

    if type(G) == nx.DiGraph or type(G) == nx.Graph:
        properties["transitivity"] = transitivity(G, verbose)

    properties["density"] = density(G, verbose)

    communities = find_communities(G, verbose)

    properties["number_of_communities"] = len(communities)
    properties["mixing_parameter"] = mixing_parameter(G, communities, verbose)
    properties["modularity"] = modularity(G, communities, verbose)

    if file_path:
        outfile = open(file_path, 'w')
        outfile.writelines(json.dumps(properties))
        outfile.close()

    return properties


In [4]:
with open("datasets/" + name + "/training_graph.pkl", "rb") as f:
    G = pickle.load(f)


# get netowrk properties
graph_measures = calculate_graph_measures(G, "datasets/" + name + "/training_graph_measures.json", verbose=True)
print(f"==>> graph_measures: {graph_measures}")

graph_measures = calculate_graph_measures(nx.DiGraph(G), "datasets/" + name + "/training_graph_simple_measures.json", verbose=True)
print(f"==>> graph_measures: {graph_measures}")

==>> number_of_nodes: 125476, in 1.5500932931900024e-05 seconds
==>> number_of_edges: 3745408, in 0.45024609938263893 seconds
==>> calculated degrees, in 0.45984720066189766 seconds
==>> density: 0.00023789278131604479, in 0.4620735999196768 seconds
==>> number_of_communities: 38, in 5538.946167899296 seconds
==>> mixing_parameter: 0.0017501431085745532, in 1.5841925013810396 seconds
==>> modularity: 0.18200216766394228, in 3.4512769002467394 seconds
==>> modularity: 0.18200216766394228, in 3.4515506997704506 seconds
==>> graph_measures: {'number_of_nodes': 125476, 'number_of_edges': 3745408, 'max_degree': 1446507, 'avg_degree': 59.69919347126144, 'density': 0.00023789278131604479, 'number_of_communities': 38, 'mixing_parameter': 0.0017501431085745532, 'modularity': 0.18200216766394228}
==>> number_of_nodes: 125476, in 1.8399208784103394e-05 seconds
==>> number_of_edges: 273554, in 0.08320390060544014 seconds
==>> calculated degrees, in 0.07962859980762005 seconds
==>> transitivity: 0.

In [5]:
with open("datasets/" + name + "/testing_graph.pkl", "rb") as f:
    G_test = pickle.load(f)

graph_measures = calculate_graph_measures(G_test, "datasets/" + name + "/testing_graph_measures.json", verbose=True)
print(f"==>> graph_measures: {graph_measures}")

graph_measures = calculate_graph_measures(nx.DiGraph(G_test), "datasets/" + name + "/testing_graph_simple_measures.json", verbose=True)
print(f"==>> graph_measures: {graph_measures}")

==>> number_of_nodes: 85708, in 9.501352906227112e-06 seconds
==>> number_of_edges: 1605175, in 0.19102410040795803 seconds
==>> calculated degrees, in 0.18479789979755878 seconds
==>> density: 0.00021851674990109105, in 0.17760640010237694 seconds
==>> number_of_communities: 45, in 1030.209157800302 seconds
==>> mixing_parameter: 3.862507203264441e-05, in 0.745651800185442 seconds
==>> modularity: 0.17980244296744527, in 1.5325086005032063 seconds
==>> modularity: 0.17980244296744527, in 1.5328153986483812 seconds
==>> graph_measures: {'number_of_nodes': 85708, 'number_of_edges': 1605175, 'max_degree': 620281, 'avg_degree': 37.45683016754562, 'density': 0.00021851674990109105, 'number_of_communities': 45, 'mixing_parameter': 3.862507203264441e-05, 'modularity': 0.17980244296744527}
==>> number_of_nodes: 85708, in 1.0401010513305664e-05 seconds
==>> number_of_edges: 135890, in 0.04083449952304363 seconds
==>> calculated degrees, in 0.03694109991192818 seconds
==>> transitivity: 0.02718

In [6]:
# import igraph as ig
# G1 = ig.Graph.from_networkx(G)

# part = G1.community_infomap()

In [7]:
# import networkx as nx

# import timeit

# verbose = True
# properties = {}

# start_time = timeit.default_timer()

# # part = G1.community_multilevel()
# # part = G1.community_spinglass()
# # part = G1.community_edge_betweenness()

# communities = []
# for com in part:
#     communities.append([G1.vs[node_index]['_nx_name']
#                         for node_index in com])

# # communities = nx.community.louvain_communities(G)
# number_of_communities = len(communities)
# if verbose:
#     print(
#         f"==>> number_of_communities: {number_of_communities}, in {str(timeit.default_timer() - start_time)} seconds")
# properties["number_of_communities"] = number_of_communities

# # Step 1: Map each node to its community
# node_to_community = {}
# for community_index, community in enumerate(communities):
#     for node in community:
#         node_to_community[node] = community_index

# # Step 2: Count inter-cluster edges efficiently
# inter_cluster_edges = 0
# for u, v in G.edges():
#     # Directly check if u and v belong to different communities
#     if node_to_community[u] != node_to_community[v]:
#         inter_cluster_edges += 1

# start_time = timeit.default_timer()
# mixing_parameter = inter_cluster_edges / G.number_of_edges()
# if verbose:
#     print(
#         f"==>> mixing_parameter: {mixing_parameter}, in {str(timeit.default_timer() - start_time)} seconds")
# properties["mixing_parameter"] = mixing_parameter

# start_time = timeit.default_timer()
# modularity = nx.community.modularity(G, communities)
# if verbose:
#     print(
#         f"==>> modularity: {modularity}, in {str(timeit.default_timer() - start_time)} seconds")
# properties["modularity"] = modularity