In [None]:
import json
import os

import networkx as nx
import numpy as np
import pandas as pd

import src.graph.graph_measures as measures
from src.dataset.dataset_info import datasets
from src.graph.centralities import cal_betweenness_centrality, cal_k_truss
from src.utils import NumpyEncoder


In [2]:
verbose = True
calculate_communities = False

save_to_file = True
file_name = "graph_properties.json"

In [3]:
my_datasets = [
    # datasets["cic_ton_iot_5_percent"],
    datasets["cic_ids_2017_5_percent"],
    # datasets["cic_ton_iot"],
    # datasets["cic_ids_2017"],
    # datasets["cic_bot_iot"],
    # datasets["cic_ton_iot_modified"],
    # datasets["ccd_inid_modified"],
    # datasets["nf_uq_nids_modified"],
    # datasets["edge_iiot"],
    # datasets["nf_cse_cic_ids2018"],
    # datasets["nf_uq_nids"],
    # datasets["x_iiot"],
]

In [None]:
for dataset in my_datasets:
    print("======================================")
    print("======================================")
    print("======================================")
    
    name = dataset.name
    folder_path = os.path.join("datasets", name)
    
    print(f"==>> name: {name}")
    
    df = pd.read_parquet(dataset.path)
    G = nx.from_pandas_edgelist(df, dataset.src_ip_col, dataset.dst_ip_col, edge_attr=[dataset.label_col, dataset.class_num_col], create_using=nx.MultiDiGraph)
    
    degrees = measures.get_degrees(G, verbose)
    
    properties = {
        "name": name,
        "number_of_nodes": measures.number_of_nodes(G, verbose),
        "number_of_edges": measures.number_of_edges(G, verbose),
        "max_degree": max(degrees),
        "avg_degree": sum(degrees) / len(degrees),
        "density": measures.density(G, verbose),
        "is_strongly_connected": measures.number_of_edges(G, verbose),
    }
    
    if type(G) is nx.DiGraph or type(G) is nx.Graph:
        properties["transitivity"] = measures.transitivity(G, verbose)
    
    if calculate_communities:
        communities = measures.find_communities(G, verbose)
        properties["number_of_communities"] = len(communities)
        properties["mixing_parameter"] = measures.mixing_parameter(
            G, communities, verbose)
        properties["modularity"] = measures.modularity(G, communities, verbose)
        
    
    measures.class_pairs(df, dataset.src_ip_col, dataset.dst_ip_col, dataset.class_col, properties, verbose, os.path.join(folder_path, "class_pairs"))
    measures.attackers_victims(G, properties, dataset.label_col, verbose)
    measures.cal_clustering_coefficients(G, properties, verbose)
    measures.cal_degree_assortativity(G, properties, verbose)
    measures.cal_diameter(G, properties, verbose)
    measures.path_length_distribution(G, properties, verbose)
    
    centralities_to_analyze = {
        "degree": np.array(list(nx.degree_centrality(G).values())),
        "pagerank": np.array(list(nx.pagerank(G).values())),
        "betweenness_centrality": np.array(list(cal_betweenness_centrality(G).values())),
        "k_truss" : np.array(list(cal_k_truss(G).values()))
    }
    
    max_entropy = np.log(len(G.nodes()))
    for name, values in centralities_to_analyze.items():
        print("==> name:")
        centrality_skewness, centrality_entropy, alpha, scale_free = measures.centrality_analysis(values, dataset.name, name, verbose)
        properties[f"{name}_skewness"] = centrality_skewness
        properties[f"{name}_entropy"] = centrality_entropy
        properties[f"{name}_relative_entropy"] = centrality_entropy / max_entropy
        properties[f"{name}_alpha"] = alpha
        properties[f"{name}_scale_free"] = 1 if scale_free else 0
    
    average_entropy = measures.compute_edge_class_entropy(G, dataset.class_num_col)
    properties["Average Edge Class Diversity (Entropy)"] = average_entropy
    print(f"Average Edge Class Diversity (Entropy): {average_entropy:.4f}")
    
    average_diversity = measures.compute_avg_edge_class_diversity(G, dataset.class_num_col)
    properties["Average Edge Class Diversity (Unique Class Ratio)"] = average_diversity
    print(f"Average Edge Class Diversity (Unique Class Ratio): {average_diversity:.4f}")
    
    
    print(f"==>> properties: {properties}")
    
    with open(os.path.join(folder_path, file_name), 'w') as f:
        json.dump(properties, f, cls=NumpyEncoder)