## 1. Configuration

In [8]:
# =============================================================================
# INPUT/OUTPUT PATHS
# =============================================================================
INPUT_CSV_PATH = '/home/smotaali/BGP_Traffic_Generation/results/bgp_updates_analysis_20251208_172321.csv'
OUTPUT_DIR = '/home/smotaali/BGP_Traffic_Generation/results/'

# =============================================================================
# COLUMN NAMES
# =============================================================================
AS_PATH_COLUMN = 'AS_Path'
TIMESTAMP_COLUMN = 'Timestamp'
LABEL_COLUMN = 'Label'  # Set to None if no labels

# =============================================================================
# TIME WINDOW SETTINGS
# =============================================================================
WINDOW_SIZE = '5min'       # '5min', '1H', '30s', '1D' for time-based; '1000' for count-based
WINDOW_TYPE = 'time'       # 'time' or 'count'
WINDOW_OVERLAP = 0.0       # 0.0 to 0.9
MIN_UPDATES_PER_WINDOW = 10

# =============================================================================
# LABEL STRATEGY
# =============================================================================
LABEL_STRATEGY = 'majority'  # 'majority', 'conservative', 'weighted'
#ABNORMAL_THRESHOLD = 0.3

print("Configuration loaded!")

Configuration loaded!


## 2. Feature Extraction Code

In [2]:
import pandas as pd
import numpy as np
import networkx as nx
from typing import Dict, List, Tuple, Union, Optional
from collections import Counter, defaultdict
import warnings
warnings.filterwarnings('ignore')


class ASGraphFeatureExtractor:
    """Extract graph-based features from AS-level topology."""

    def __init__(self):
        self.graph = None
        self.weighted_graph = None
        self.as_path_counts = defaultdict(int)
        self.df = None
        self.label_column = None
        self.label_strategy = LABEL_STRATEGY

    def parse_as_path(self, as_path_str: str) -> List[int]:
        if pd.isna(as_path_str) or not as_path_str:
            return []
        as_path_str = str(as_path_str).strip()
        asns = []
        for part in as_path_str.split():
            part = part.strip('{}')
            if part.isdigit():
                asns.append(int(part))
        return asns

    def extract_all_ases_from_dataframe(self, df: pd.DataFrame, as_path_column: str) -> List[int]:
        all_ases = set()
        for idx, row in df.iterrows():
            if as_path_column not in row:
                continue
            as_path = self.parse_as_path(row[as_path_column])
            all_ases.update(as_path)
        return sorted(list(all_ases))

    def build_graph_from_dataframe(self, df: pd.DataFrame, as_path_column: str,
                                   label_column: Optional[str] = None,
                                   verbose: bool = True) -> nx.Graph:
        self.graph = nx.Graph()
        self.weighted_graph = nx.Graph()
        self.as_path_counts = defaultdict(int)
        edge_weights = defaultdict(int)
        self.df = df
        self.label_column = label_column

        all_ases = self.extract_all_ases_from_dataframe(df, as_path_column)
        for asn in all_ases:
            self.graph.add_node(asn)
            self.weighted_graph.add_node(asn)

        for idx, row in df.iterrows():
            if as_path_column not in row:
                continue
            as_path = self.parse_as_path(row[as_path_column])
            if len(as_path) < 1:
                continue

            for asn in as_path:
                if not self.graph.has_node(asn):
                    self.graph.add_node(asn)
                    self.weighted_graph.add_node(asn)

            for i in range(len(as_path) - 1):
                source = as_path[i]
                target = as_path[i + 1]
                if source == target:  # Skip self-loops
                    continue
                edge = tuple(sorted([source, target]))
                edge_weights[edge] += 1
                if not self.graph.has_edge(source, target):
                    self.graph.add_edge(source, target)

            path_tuple = tuple(as_path)
            self.as_path_counts[path_tuple] += 1

        for (u, v), weight in edge_weights.items():
            self.weighted_graph.add_edge(u, v, weight=weight)

        if verbose:
            print(f"Graph constructed: {self.graph.number_of_nodes()} nodes, {self.graph.number_of_edges()} edges")
        return self.graph

    def extract_label(self) -> str:
        if self.df is None or self.label_column is None:
            return 'unknown'
        if self.label_column not in self.df.columns:
            return 'unknown'
        labels = self.df[self.label_column].value_counts()
        if labels.empty:
            return 'unknown'
        if self.label_strategy == 'majority':
            return labels.idxmax()
        elif self.label_strategy == 'conservative':
            if any(label != 'normal' for label in labels.index):
                abnormal_labels = [label for label in labels.index if label != 'normal']
                return abnormal_labels[0]
            return 'normal'
        elif self.label_strategy == 'weighted':
            total = labels.sum()
            abnormal_weight = sum(count for label, count in labels.items() if label != 'normal') / total
            if abnormal_weight > ABNORMAL_THRESHOLD:
                abnormal_labels = [label for label in labels.index if label != 'normal']
                return abnormal_labels[0] if abnormal_labels else 'normal'
            return 'normal'
        return 'unknown'

    def print_graph_summary(self):
        if self.graph is None:
            print("No graph constructed.")
            return
        num_nodes = self.graph.number_of_nodes()
        num_edges = self.graph.number_of_edges()
        max_edges = num_nodes * (num_nodes - 1) / 2
        density = num_edges / max_edges if max_edges > 0 else 0
        is_connected = nx.is_connected(self.graph)
        print(f"\nNodes: {num_nodes}, Edges: {num_edges}, Density: {density:.4f}, Connected: {is_connected}")
        if self.label_column and self.df is not None and self.label_column in self.df.columns:
            print(f"Label: {self.extract_label().upper()}")

    def extract_basic_metrics(self) -> Dict:
        metrics = {}
        if nx.is_connected(self.graph):
            G = self.graph
        else:
            largest_cc = max(nx.connected_components(self.graph), key=len)
            G = self.graph.subgraph(largest_cc).copy()
        metrics['num_nodes'] = self.graph.number_of_nodes()
        metrics['num_edges'] = self.graph.number_of_edges()
        try:
            metrics['diameter'] = nx.diameter(G)
        except:
            metrics['diameter'] = -1
        triangles = nx.triangles(self.graph)
        metrics['num_triangles'] = sum(triangles.values()) // 3
        return metrics

    def extract_centrality_metrics(self) -> Dict:
        metrics = {}
        if nx.is_connected(self.graph):
            G = self.graph
        else:
            largest_cc = max(nx.connected_components(self.graph), key=len)
            G = self.graph.subgraph(largest_cc).copy()

        try:
            eig_cent = nx.eigenvector_centrality(G, max_iter=1000)
            metrics['eigenvector_centrality_avg'] = np.mean(list(eig_cent.values()))
            metrics['eigenvector_centrality_max'] = np.max(list(eig_cent.values()))
        except:
            metrics['eigenvector_centrality_avg'] = 0.0
            metrics['eigenvector_centrality_max'] = 0.0

        harm_cent = nx.harmonic_centrality(G)
        metrics['harmonic_centrality_avg'] = np.mean(list(harm_cent.values()))
        metrics['harmonic_centrality_max'] = np.max(list(harm_cent.values()))

        pagerank = nx.pagerank(G)
        metrics['pagerank_avg'] = np.mean(list(pagerank.values()))
        metrics['pagerank_max'] = np.max(list(pagerank.values()))

        deg_cent = nx.degree_centrality(self.graph)
        metrics['degree_centrality_avg'] = np.mean(list(deg_cent.values()))
        metrics['degree_centrality_max'] = np.max(list(deg_cent.values()))

        try:
            close_cent = nx.closeness_centrality(G)
            metrics['closeness_centrality_avg'] = np.mean(list(close_cent.values()))
            metrics['closeness_centrality_max'] = np.max(list(close_cent.values()))
        except:
            metrics['closeness_centrality_avg'] = 0.0
            metrics['closeness_centrality_max'] = 0.0

        try:
            between_cent = nx.betweenness_centrality(G)
            metrics['betweenness_centrality_avg'] = np.mean(list(between_cent.values()))
            metrics['betweenness_centrality_max'] = np.max(list(between_cent.values()))
        except:
            metrics['betweenness_centrality_avg'] = 0.0
            metrics['betweenness_centrality_max'] = 0.0

        try:
            load_cent = nx.load_centrality(G)
            metrics['load_centrality_avg'] = np.mean(list(load_cent.values()))
            metrics['load_centrality_max'] = np.max(list(load_cent.values()))
        except:
            metrics['load_centrality_avg'] = 0.0
            metrics['load_centrality_max'] = 0.0

        try:
            ecc = nx.eccentricity(G)
            metrics['eccentricity_avg'] = np.mean(list(ecc.values()))
            metrics['eccentricity_max'] = np.max(list(ecc.values()))
        except:
            metrics['eccentricity_avg'] = -1
            metrics['eccentricity_max'] = -1

        return metrics

    def extract_connectivity_metrics(self) -> Dict:
        metrics = {}
        if nx.is_connected(self.graph):
            G = self.graph
        else:
            largest_cc = max(nx.connected_components(self.graph), key=len)
            G = self.graph.subgraph(largest_cc).copy()

        try:
            metrics['algebraic_connectivity'] = nx.algebraic_connectivity(G)
        except:
            metrics['algebraic_connectivity'] = 0.0

        try:
            metrics['node_connectivity'] = nx.node_connectivity(G)
        except:
            metrics['node_connectivity'] = 0

        try:
            metrics['edge_connectivity'] = nx.edge_connectivity(G)
        except:
            metrics['edge_connectivity'] = 0

        try:
            laplacian = nx.laplacian_matrix(G).todense()
            eigenvalues = np.linalg.eigvalsh(laplacian)
            eigenvalues = eigenvalues[eigenvalues > 1e-10]
            if len(eigenvalues) > 0:
                metrics['effective_graph_resistance'] = G.number_of_nodes() * np.sum(1.0 / eigenvalues)
            else:
                metrics['effective_graph_resistance'] = float('inf')
        except:
            metrics['effective_graph_resistance'] = -1.0

        try:
            adj_matrix = nx.adjacency_matrix(G).todense()
            eigenvalues = np.linalg.eigvalsh(adj_matrix)
            metrics['natural_connectivity'] = np.log(np.mean(np.exp(eigenvalues)))
        except:
            metrics['natural_connectivity'] = 0.0

        try:
            adj_matrix = nx.adjacency_matrix(G).todense()
            eigenvalues = np.real(np.linalg.eigvals(adj_matrix))
            metrics['largest_eigenvalue'] = float(np.max(eigenvalues))
        except:
            metrics['largest_eigenvalue'] = 0.0

        try:
            norm_lap_eigenvalues = np.real(nx.normalized_laplacian_spectrum(G))
            metrics['weighted_spectrum_3'] = float(np.sum(norm_lap_eigenvalues ** 3))
            metrics['weighted_spectrum_4'] = float(np.sum(norm_lap_eigenvalues ** 4))
        except:
            metrics['weighted_spectrum_3'] = 0.0
            metrics['weighted_spectrum_4'] = 0.0

        return metrics

    def extract_clustering_metrics(self) -> Dict:
        metrics = {}
        try:
            metrics['assortativity'] = nx.degree_assortativity_coefficient(self.graph)
        except:
            metrics['assortativity'] = 0.0

        try:
            square_clust = nx.square_clustering(self.graph)
            metrics['square_clustering_avg'] = np.mean(list(square_clust.values()))
            metrics['square_clustering_max'] = np.max(list(square_clust.values()))
        except:
            metrics['square_clustering_avg'] = 0.0
            metrics['square_clustering_max'] = 0.0

        metrics['avg_clustering'] = nx.average_clustering(self.graph)
        return metrics

    def extract_robustness_metrics(self) -> Dict:
        metrics = {}
        components = list(nx.connected_components(self.graph))
        metrics['num_components'] = len(components)
        metrics['largest_component_size'] = len(max(components, key=len))

        bridges = list(nx.bridges(self.graph))
        metrics['num_bridges'] = len(bridges)

        articulation_points = list(nx.articulation_points(self.graph))
        metrics['num_articulation_points'] = len(articulation_points)

        try:
            if nx.is_connected(self.graph):
                laplacian = nx.laplacian_matrix(self.graph).todense()
                cofactor = laplacian[1:, 1:]
                metrics['num_spanning_trees'] = int(round(np.linalg.det(cofactor)))
            else:
                metrics['num_spanning_trees'] = 0
        except:
            metrics['num_spanning_trees'] = -1

        try:
            degrees = [d for n, d in self.graph.degree()]
            k_avg = np.mean(degrees)
            k2_avg = np.mean([d**2 for d in degrees])
            denominator = k2_avg - k_avg
            if denominator > 0:
                metrics['percolation_threshold'] = k_avg / denominator
            else:
                metrics['percolation_threshold'] = 1.0
            ratio = k2_avg / k_avg if k_avg > 0 else 1
            if ratio > 1:
                metrics['percolation_limit'] = 1 - 1 / (ratio - 1)
            else:
                metrics['percolation_limit'] = 0.0
        except:
            metrics['percolation_threshold'] = -1.0
            metrics['percolation_limit'] = -1.0

        return metrics

    def extract_advanced_metrics(self) -> Dict:
        metrics = {}
        if nx.is_connected(self.graph):
            G = self.graph
        else:
            largest_cc = max(nx.connected_components(self.graph), key=len)
            G = self.graph.subgraph(largest_cc).copy()

        try:
            degrees = [d for n, d in self.graph.degree()]
            degree_counts = Counter(degrees)
            total = sum(degree_counts.values())
            entropy = -sum((count/total) * np.log(count/total) for count in degree_counts.values())
            max_entropy = np.log(len(degree_counts))
            metrics['symmetry_ratio'] = entropy / max_entropy if max_entropy > 0 else 0.0
        except:
            metrics['symmetry_ratio'] = 0.0

        try:
            adj_eigenvalues = np.real(nx.adjacency_spectrum(G))
            unique_eigenvalues = len(np.unique(np.round(adj_eigenvalues, 10)))
            diameter = nx.diameter(G)
            metrics['symmetry_ratio_spectral'] = unique_eigenvalues / (diameter + 1)
        except:
            metrics['symmetry_ratio_spectral'] = 0.0

        try:
            adj_matrix = nx.adjacency_matrix(self.weighted_graph, weight='weight').todense()
            eigenvalues = np.linalg.eigvals(adj_matrix)
            metrics['weighted_spectral_radius'] = float(np.max(np.abs(eigenvalues)).real)
        except:
            metrics['weighted_spectral_radius'] = 0.0

        try:
            metrics['avg_global_efficiency'] = nx.global_efficiency(G)
        except:
            metrics['avg_global_efficiency'] = 0.0

        try:
            metrics['local_efficiency'] = nx.local_efficiency(self.graph)
        except:
            metrics['local_efficiency'] = 0.0

        try:
            metrics['average_shortest_path_length'] = nx.average_shortest_path_length(G)
        except:
            metrics['average_shortest_path_length'] = -1.0

        try:
            neighbor_degrees = nx.average_neighbor_degree(self.graph)
            metrics['mean_degree_neighborhood_avg'] = np.mean(list(neighbor_degrees.values()))
            metrics['mean_degree_neighborhood_max'] = np.max(list(neighbor_degrees.values()))
        except:
            metrics['mean_degree_neighborhood_avg'] = 0.0
            metrics['mean_degree_neighborhood_max'] = 0.0

        try:
            cliques = list(nx.find_cliques(self.graph))
            metrics['num_cliques'] = len(cliques)
            metrics['max_clique_size'] = max(len(c) for c in cliques) if cliques else 0
        except:
            metrics['num_cliques'] = 0
            metrics['max_clique_size'] = 0

        return metrics

    def extract_node_specific_metrics(self) -> pd.DataFrame:
        if nx.is_connected(self.graph):
            G = self.graph
        else:
            largest_cc = max(nx.connected_components(self.graph), key=len)
            G = self.graph.subgraph(largest_cc).copy()

        try: harmonic_cent = nx.harmonic_centrality(G)
        except: harmonic_cent = {n: 0.0 for n in G.nodes()}
        try: pagerank = nx.pagerank(G)
        except: pagerank = {n: 1.0/G.number_of_nodes() for n in G.nodes()}
        try: eig_cent = nx.eigenvector_centrality(G, max_iter=1000)
        except: eig_cent = {n: 0.0 for n in G.nodes()}
        try: eccentricity = nx.eccentricity(G)
        except: eccentricity = {n: -1 for n in G.nodes()}
        try: square_clust = nx.square_clustering(self.graph)
        except: square_clust = {n: 0.0 for n in self.graph.nodes()}
        try: betweenness = nx.betweenness_centrality(G)
        except: betweenness = {n: 0.0 for n in G.nodes()}
        try: node_clique_num = nx.node_clique_number(self.graph)
        except: node_clique_num = {n: 1 for n in self.graph.nodes()}

        proximity = {}
        try:
            for node in G.nodes():
                lengths = nx.single_source_shortest_path_length(G, node)
                proximity[node] = np.mean(list(lengths.values())) if len(lengths) > 1 else 0.0
        except:
            proximity = {n: 0.0 for n in G.nodes()}

        node_data = []
        for node in self.graph.nodes():
            node_data.append({
                'asn': node,
                'degree': self.graph.degree(node),
                'harmonic_centrality': harmonic_cent.get(node, 0.0),
                'pagerank': pagerank.get(node, 0.0),
                'eigenvector_centrality': eig_cent.get(node, 0.0),
                'eccentricity': eccentricity.get(node, -1),
                'square_clustering': square_clust.get(node, 0.0),
                'node_clique_number': node_clique_num.get(node, 1),
                'proximity': proximity.get(node, 0.0),
                'mediation_centrality': betweenness.get(node, 0.0)
            })
        return pd.DataFrame(node_data)

    def extract_all_features(self, verbose: bool = True) -> Dict:
        all_features = {}
        if verbose: print("Extracting basic metrics...")
        all_features.update(self.extract_basic_metrics())
        if verbose: print("Extracting centrality metrics...")
        all_features.update(self.extract_centrality_metrics())
        if verbose: print("Extracting connectivity metrics...")
        all_features.update(self.extract_connectivity_metrics())
        if verbose: print("Extracting clustering metrics...")
        all_features.update(self.extract_clustering_metrics())
        if verbose: print("Extracting robustness metrics...")
        all_features.update(self.extract_robustness_metrics())
        if verbose: print("Extracting advanced metrics...")
        all_features.update(self.extract_advanced_metrics())
        if self.label_column is not None:
            if verbose: print("Extracting label...")
            all_features['label'] = self.extract_label()
        return all_features


class TimeWindowedFeatureExtractor:
    """Extract graph features from time-windowed BGP data."""

    def __init__(self, window_size: str, window_type: str = 'time',
                 overlap: float = 0.0, min_updates: int = 10):
        self.window_size = window_size
        self.window_type = window_type
        self.overlap = min(max(overlap, 0.0), 0.9)
        self.min_updates = min_updates

    def _parse_timestamp(self, df: pd.DataFrame, ts_col: str) -> pd.DataFrame:
        df = df.copy()
        if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
            df[ts_col] = pd.to_datetime(df[ts_col], errors='coerce')
        df = df.dropna(subset=[ts_col])
        return df.sort_values(ts_col).reset_index(drop=True)

    def _create_time_windows(self, df, ts_col):
        min_time, max_time = df[ts_col].min(), df[ts_col].max()
        window_td = pd.Timedelta(self.window_size)
        step_td = window_td * (1 - self.overlap)
        windows = []
        current = min_time
        while current < max_time:
            windows.append((current, current + window_td))
            current += step_td
        return windows

    def _create_count_windows(self, df):
        total = len(df)
        wc = int(self.window_size)
        step = int(wc * (1 - self.overlap))
        windows = []
        start = 0
        while start < total:
            windows.append((start, min(start + wc, total)))
            start += step
            if total - start < self.min_updates:
                break
        return windows

    def extract_windowed_features(self, df, ts_col, as_path_col, label_col=None, verbose=True):
        df = self._parse_timestamp(df, ts_col)
        if self.window_type == 'time':
            windows = self._create_time_windows(df, ts_col)
        else:
            windows = self._create_count_windows(df)

        if verbose:
            print(f"Created {len(windows)} windows ({self.window_type}-based, size={self.window_size})")

        all_features = []
        for idx, window in enumerate(windows):
            if self.window_type == 'time':
                start, end = window
                window_df = df[(df[ts_col] >= start) & (df[ts_col] < end)]
                info = {'window_idx': idx, 'window_start': start, 'window_end': end, 'num_updates': len(window_df)}
            else:
                s_idx, e_idx = window
                window_df = df.iloc[s_idx:e_idx]
                info = {'window_idx': idx, 'window_start': window_df[ts_col].min(),
                        'window_end': window_df[ts_col].max(), 'num_updates': len(window_df)}

            if len(window_df) < self.min_updates:
                continue

            try:
                extractor = ASGraphFeatureExtractor()
                extractor.build_graph_from_dataframe(window_df, as_path_col, label_col, verbose=False)
                features = extractor.extract_all_features(verbose=False)
                features.update(info)
                all_features.append(features)
                if verbose and idx % 10 == 0:
                    print(f"  Window {idx}/{len(windows)}: {len(window_df)} updates")
            except Exception as e:
                if verbose:
                    print(f"  Window {idx}: Error - {e}")

        result_df = pd.DataFrame(all_features)
        info_cols = ['window_idx', 'window_start', 'window_end', 'num_updates']
        other_cols = [c for c in result_df.columns if c not in info_cols]
        result_df = result_df[info_cols + other_cols]
        if verbose:
            print(f"Extracted features for {len(result_df)} windows")
        return result_df


print("Feature extraction code loaded!")

Feature extraction code loaded!


## 3. Load Data

In [3]:
df = pd.read_csv(INPUT_CSV_PATH)
print(f"Loaded {len(df):,} rows")
print(f"Columns: {list(df.columns)}")
df.head()

Loaded 7,746 rows
Columns: ['Type', 'Timestamp', 'Subtype', 'Peer_IP', 'Peer_ASN', 'Prefix', 'AS_Path', 'Origin', 'Next_Hop', 'MED', 'Local_Pref', 'Communities', 'Aggregator_Flag', 'Aggregator_ASN Aggregator_IP', 'Label']


Unnamed: 0,Type,Timestamp,Subtype,Peer_IP,Peer_ASN,Prefix,AS_Path,Origin,Next_Hop,MED,Local_Pref,Communities,Aggregator_Flag,Aggregator_ASN Aggregator_IP,Label
0,BGP,2025-12-08 17:21:02.824256,ANNOUNCE,10.122.57.238,41336.0,192.0.2.0/24,41336,INCOMPLETE,10.122.57.238,100.0,200.0,65535:65281,0,,normal
1,BGP,2025-12-08 17:21:02.837679,ANNOUNCE,10.122.57.238,41336.0,192.0.2.0/24,41336,EGP,10.122.57.238,100.0,200.0,65535:65281,0,,normal
2,BGP,2025-12-08 17:21:02.847678,ANNOUNCE,10.122.57.238,41336.0,192.0.2.0/24,41336,IGP,10.122.57.238,100.0,200.0,65535:65281,0,,normal
3,BGP,2025-12-08 17:21:02.857748,ANNOUNCE,10.122.57.238,41336.0,203.0.113.0/24,41336,IGP,10.122.57.238,100.0,200.0,65535:65281,0,,normal
4,BGP,2025-12-08 17:21:02.857748,ANNOUNCE,10.122.57.238,41336.0,198.51.100.0/24,41336,IGP,10.122.57.238,100.0,200.0,65535:65281,0,,normal


## 4. Global Feature Extraction

In [4]:
# Build graph and extract global features
extractor = ASGraphFeatureExtractor()
graph = extractor.build_graph_from_dataframe(df, AS_PATH_COLUMN, LABEL_COLUMN)
extractor.print_graph_summary()

global_features = extractor.extract_all_features()
global_df = pd.DataFrame([global_features])

# Save
global_df.to_csv(f"{OUTPUT_DIR}graph_features_global_20251208_172321.csv", index=False)
print(f"\nSaved to: {OUTPUT_DIR}graph_features_global.csv")
print(f"Shape: {global_df.shape}")
global_df.T

Graph constructed: 1491 nodes, 4701 edges

Nodes: 1491, Edges: 4701, Density: 0.0042, Connected: True
Label: NORMAL
Extracting basic metrics...
Extracting centrality metrics...
Extracting connectivity metrics...
Extracting clustering metrics...
Extracting robustness metrics...
Extracting advanced metrics...
Extracting label...

Saved to: /home/smotaali/BGP_Traffic_Generation/resultsgraph_features_global.csv
Shape: (1, 50)


Unnamed: 0,0
num_nodes,1491
num_edges,4701
diameter,10
num_triangles,7729
eigenvector_centrality_avg,0.012543
eigenvector_centrality_max,0.19376
harmonic_centrality_avg,519.673259
harmonic_centrality_max,809.816667
pagerank_avg,0.000671
pagerank_max,0.017419


## 5. Per-Node Feature Extraction

In [5]:
# Extract per-node features (uses same graph from above)
node_df = extractor.extract_node_specific_metrics()

# Save
node_df.to_csv(f"{OUTPUT_DIR}graph_features_per_node_20251208_172321.csv", index=False)
print(f"Saved to: {OUTPUT_DIR}graph_features_per_node.csv")
print(f"Shape: {node_df.shape}")
print("\nTop 10 ASes by degree:")
node_df.sort_values('degree', ascending=False).head(10)

Saved to: /home/smotaali/BGP_Traffic_Generation/resultsgraph_features_per_node.csv
Shape: (1491, 10)

Top 10 ASes by degree:


Unnamed: 0,asn,degree,harmonic_centrality,pagerank,eigenvector_centrality,eccentricity,square_clustering,node_clique_number,proximity,mediation_centrality
629,32934,183,809.816667,0.017419,0.187835,6,0.035502,22,1.989269,0.096359
16,4637,177,807.733333,0.016212,0.19376,6,0.039195,22,1.98994,0.082451
31,9304,173,802.483333,0.015999,0.18468,6,0.037147,22,2.004695,0.087699
29,8928,171,802.566667,0.015614,0.185743,6,0.038254,22,2.002012,0.07665
125,15169,167,798.483333,0.015812,0.180154,6,0.037117,22,2.013414,0.084158
294,20940,164,799.066667,0.015095,0.180109,6,0.037901,22,2.006707,0.075098
4,1273,163,795.35,0.015809,0.156154,6,0.030168,22,2.021462,0.097564
30,9002,160,796.15,0.014708,0.177582,6,0.038301,22,2.013414,0.071449
6,2497,156,794.733333,0.014462,0.175309,6,0.038867,22,2.014085,0.074462
15,4323,156,793.233333,0.014251,0.173533,6,0.038187,22,2.020121,0.069305


## 6. Time-Windowed Feature Extraction

In [6]:
# Extract time-windowed features
windowed_extractor = TimeWindowedFeatureExtractor(
    window_size=WINDOW_SIZE,
    window_type=WINDOW_TYPE,
    overlap=WINDOW_OVERLAP,
    min_updates=MIN_UPDATES_PER_WINDOW
)

windowed_df = windowed_extractor.extract_windowed_features(
    df, TIMESTAMP_COLUMN, AS_PATH_COLUMN, LABEL_COLUMN, verbose=True
)

# Save
windowed_df.to_csv(f"{OUTPUT_DIR}graph_features_windowed_20251208_172321.csv", index=False)
print(f"\nSaved to: {OUTPUT_DIR}graph_features_windowed.csv")
print(f"Shape: {windowed_df.shape}")
print("\nFirst 10 windows:")
display_cols = ['window_idx', 'window_start', 'window_end', 'num_updates', 'num_nodes', 'num_edges', 'diameter']
if 'label' in windowed_df.columns:
    display_cols.append('label')
windowed_df[display_cols].head(10)

Created 3 windows (time-based, size=5min)
  Window 0/3: 4417 updates
Extracted features for 3 windows

Saved to: /home/smotaali/BGP_Traffic_Generation/resultsgraph_features_windowed.csv
Shape: (3, 54)

First 10 windows:


Unnamed: 0,window_idx,window_start,window_end,num_updates,num_nodes,num_edges,diameter,label
0,0,2025-12-08 17:21:02.824256,2025-12-08 17:26:02.824256,4417,891,2350,10,normal
1,1,2025-12-08 17:26:02.824256,2025-12-08 17:31:02.824256,3159,954,2560,10,normal
2,2,2025-12-08 17:31:02.824256,2025-12-08 17:36:02.824256,170,106,191,9,normal


## 7. Summary

In [7]:
print("="*60)
print("EXTRACTION COMPLETE")
print("="*60)
print(f"Global features:   {global_df.shape[0]} row x {global_df.shape[1]} features")
print(f"Per-node features: {node_df.shape[0]} rows x {node_df.shape[1]} features")
print(f"Windowed features: {windowed_df.shape[0]} rows x {windowed_df.shape[1]} features")
print("\nOutput files:")
print(f"  - {OUTPUT_DIR}graph_features_global.csv")
print(f"  - {OUTPUT_DIR}graph_features_per_node.csv")
print(f"  - {OUTPUT_DIR}graph_features_windowed.csv")

EXTRACTION COMPLETE
Global features:   1 row x 50 features
Per-node features: 1491 rows x 10 features
Windowed features: 3 rows x 54 features

Output files:
  - /home/smotaali/BGP_Traffic_Generation/resultsgraph_features_global.csv
  - /home/smotaali/BGP_Traffic_Generation/resultsgraph_features_per_node.csv
  - /home/smotaali/BGP_Traffic_Generation/resultsgraph_features_windowed.csv
