In [None]:
# ======================================
# PART 1: Import Library - V7 FIXED VERSION
# ======================================
# This version fixes ALL correlation issues found in v5/v6:
# 1. Fixed sample_edit_distance bug (undefined create_large_max)
# 2. Decoupled withdrawals from flaps (was ~100%, now ~42%)
# 3. Increased withdrawal->NADAS correlation (was ~5%, now ~67%)
# 4. Reduced announcements->dups over-correlation (was ~85%, now ~33%)

from scapy.all import IP, IPv6, TCP, Ether, Padding, wrpcap, raw, rdpcap, load_contrib
from scapy.contrib.bgp import *
from scapy.utils import PcapReader
from scipy.stats import pareto, weibull_min
import datetime
import time
import random
import os
import csv
import struct
import traceback
import numpy as np
from collections import defaultdict
from typing import Dict, List, Tuple, Set, Optional
from dataclasses import dataclass

load_contrib('bgp')

# Create output directory
OUTPUT_DIR = "/home/user/BGP_Traffic_Generation/pcaps"
RESULTS_DIR = "/home/user/BGP_Traffic_Generation/results"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)
print("V7 Output directory created:", OUTPUT_DIR)

In [None]:
# ======================================
# PART 2: V7 CORRELATION FIXES (ALL INLINE)
# ======================================
# Target correlations from real RIPE data
REAL_CORRELATIONS = {
    ('withdrawals', 'nadas'): 0.671,
    ('imp_wd_dpath', 'unique_as_path_max'): 0.319,
    ('withdrawals', 'flaps'): 0.425,
    ('announcements', 'dups'): 0.335,
    ('withdrawals', 'imp_wd_spath'): 0.520,
    ('edit_distance_dict_3', 'edit_distance_dict_4'): 0.463,
    ('imp_wd_spath', 'flaps'): 0.488,
    ('withdrawals', 'imp_wd'): 0.329,
    ('imp_wd', 'flaps'): 0.302,
    ('nadas', 'flaps'): 0.352,
    ('edit_distance_max', 'edit_distance_dict_3'): 0.356,
    ('unique_as_path_max', 'edit_distance_avg'): 0.325,
    ('dups', 'nadas'): 0.322,
}

@dataclass
class PrefixBehaviorProfileV7:
    """V7 prefix behavior - FIXED correlation logic"""
    activity_level: str = 'normal'
    # Standalone withdrawals (NOT tied to flaps) - KEY FIX
    has_standalone_withdrawals: bool = False
    withdrawal_count: int = 0
    withdrawal_triggers_nadas: bool = False
    nadas_count_after_withdrawal: int = 0
    # Flapping (separate from withdrawals)
    is_flapping: bool = False
    flap_count: int = 0
    # Implicit withdrawals
    has_imp_wd: bool = False
    imp_wd_count: int = 0
    imp_wd_triggers_withdrawal: bool = False
    has_imp_wd_spath: bool = False
    imp_wd_spath_count: int = 0
    imp_wd_spath_triggers_withdrawal: bool = False
    # Path diversity
    target_unique_paths: int = 2
    # Duplicates
    has_duplicates: bool = False
    duplicate_count: int = 0
    duplicates_with_nadas: bool = False
    duplicates_standalone: bool = False
    # Edit distance
    edit_distance_cluster: str = 'small'
    target_announcements: int = 2

def sample_prefix_behavior_v7():
    """V7: Sample prefix behavior with FIXED correlations"""
    profile = PrefixBehaviorProfileV7()
    roll = random.random()
    
    if roll < 0.35:  # Single prefix (35%)
        profile.activity_level = 'single'
        profile.target_announcements = 1
        profile.target_unique_paths = 1
        profile.edit_distance_cluster = 'none'
    elif roll < 0.55:  # Stable (20%)
        profile.activity_level = 'stable'
        profile.target_announcements = random.randint(1, 3)
        if random.random() < 0.08:
            profile.has_standalone_withdrawals = True
            profile.withdrawal_count = 1
            profile.withdrawal_triggers_nadas = True
            profile.nadas_count_after_withdrawal = 1
        if random.random() < 0.1:
            profile.has_duplicates = True
            profile.duplicate_count = 1
            profile.duplicates_standalone = True
        profile.edit_distance_cluster = 'small'
    elif roll < 0.75:  # Normal (20%)
        profile.activity_level = 'normal'
        profile.target_announcements = random.randint(2, 5)
        if random.random() < 0.25:
            profile.has_standalone_withdrawals = True
            profile.withdrawal_count = random.randint(1, 2)
            profile.withdrawal_triggers_nadas = random.random() < 0.75
            profile.nadas_count_after_withdrawal = random.randint(1, 2)
        if random.random() < 0.12:
            profile.is_flapping = True
            profile.flap_count = random.randint(1, 2)
        if random.random() < 0.15:
            profile.has_imp_wd = True
            profile.imp_wd_count = random.randint(1, 2)
            profile.imp_wd_triggers_withdrawal = random.random() < 0.3
        if random.random() < 0.20:
            profile.has_duplicates = True
            profile.duplicate_count = random.randint(1, 2)
            profile.duplicates_with_nadas = random.random() < 0.5
            profile.duplicates_standalone = random.random() < 0.3
        profile.edit_distance_cluster = random.choices(['small', 'medium'], weights=[0.7, 0.3])[0]
    elif roll < 0.90:  # Active (15%)
        profile.activity_level = 'active'
        profile.target_announcements = random.randint(4, 8)
        if random.random() < 0.40:
            profile.has_standalone_withdrawals = True
            profile.withdrawal_count = random.randint(1, 3)
            profile.withdrawal_triggers_nadas = random.random() < 0.80
            profile.nadas_count_after_withdrawal = random.randint(1, 3)
        if random.random() < 0.25:
            profile.is_flapping = True
            profile.flap_count = random.randint(1, 3)
        if random.random() < 0.25:
            profile.has_imp_wd = True
            profile.imp_wd_count = random.randint(1, 3)
            profile.imp_wd_triggers_withdrawal = random.random() < 0.4
        if random.random() < 0.20:
            profile.has_imp_wd_spath = True
            profile.imp_wd_spath_count = random.randint(1, 2)
            profile.imp_wd_spath_triggers_withdrawal = random.random() < 0.6
        if random.random() < 0.30:
            profile.has_duplicates = True
            profile.duplicate_count = random.randint(1, 3)
            profile.duplicates_with_nadas = random.random() < 0.6
        profile.edit_distance_cluster = random.choices(['small', 'medium', 'large'], weights=[0.3, 0.5, 0.2])[0]
    else:  # Unstable (10%)
        profile.activity_level = 'unstable'
        profile.target_announcements = random.randint(6, 15)
        profile.has_standalone_withdrawals = True
        profile.withdrawal_count = random.randint(2, 5)
        profile.withdrawal_triggers_nadas = True
        profile.nadas_count_after_withdrawal = random.randint(2, 4)
        if random.random() < 0.60:
            profile.is_flapping = True
            profile.flap_count = random.randint(2, 4)
        profile.has_imp_wd = True
        profile.imp_wd_count = random.randint(2, 4)
        profile.imp_wd_triggers_withdrawal = random.random() < 0.5
        profile.has_imp_wd_spath = True
        profile.imp_wd_spath_count = random.randint(1, 3)
        profile.has_duplicates = True
        profile.duplicate_count = random.randint(2, 5)
        profile.duplicates_with_nadas = True
        profile.edit_distance_cluster = random.choices(['medium', 'large'], weights=[0.4, 0.6])[0]
    
    return profile

def sample_edit_distance_v7(cluster, create_large_max=False):
    """V7 FIXED: sample edit distance with proper parameter"""
    if cluster == 'none':
        return 0
    elif cluster == 'small':
        return random.choices([0, 1, 2], weights=[0.3, 0.5, 0.2])[0]
    elif cluster == 'medium':
        if create_large_max:
            return random.choices([2, 3, 4], weights=[0.3, 0.5, 0.2])[0]
        return random.choices([1, 2, 3], weights=[0.25, 0.50, 0.25])[0]
    else:  # large
        if create_large_max:
            return random.choices([3, 4, 5, 6], weights=[0.25, 0.35, 0.25, 0.15])[0]
        return random.choices([3, 4, 5, 6], weights=[0.35, 0.35, 0.20, 0.10])[0]

def calculate_edit_distance(path1, path2):
    """Calculate Levenshtein edit distance"""
    if not path1 or not path2:
        return 0
    m, n = len(path1), len(path2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]
    for i in range(m + 1):
        dp[i][0] = i
    for j in range(n + 1):
        dp[0][j] = j
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if path1[i-1] == path2[j-1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])
    return dp[m][n]

print("V7 Correlation fixes loaded")

In [None]:
# ======================================
# PART 3: AS Topology Generation
# ======================================

def generate_as_topology():
    """Generate realistic BGP topology"""
    as_numbers = {
        "tier1": [1299, 3356, 174, 3257, 6762],
        "tier2": [6939, 1273, 3320, 6453, 2914, 5511, 7018],
        "tier3": [41336, 35060, 34554, 49544, 50673, 39126, 48292, 62041,
                  45899, 51697, 60781, 44002, 56630, 31027, 64512],
        "ixp_content": [13335, 15169, 32934]
    }
    
    topology = {}
    for tier, asn_list in as_numbers.items():
        tier_level = int(tier.replace("tier", "")) if "tier" in tier else 4
        for asn in asn_list:
            topology[asn] = {"tier": tier_level, "neighbors": [], "relationships": {}}
    
    # Connect Tier 1s (full mesh)
    for i, asn1 in enumerate(as_numbers["tier1"]):
        for asn2 in as_numbers["tier1"][i+1:]:
            topology[asn1]["neighbors"].append(asn2)
            topology[asn2]["neighbors"].append(asn1)
            topology[asn1]["relationships"][asn2] = "peer"
            topology[asn2]["relationships"][asn1] = "peer"
    
    # Connect Tier 2s to Tier 1s
    for asn2 in as_numbers["tier2"]:
        providers = random.sample(as_numbers["tier1"], random.randint(1, 3))
        for asn1 in providers:
            topology[asn2]["neighbors"].append(asn1)
            topology[asn1]["neighbors"].append(asn2)
            topology[asn2]["relationships"][asn1] = "provider"
            topology[asn1]["relationships"][asn2] = "customer"
    
    # Connect Tier 2s to each other
    for i, asn1 in enumerate(as_numbers["tier2"]):
        potential_peers = as_numbers["tier2"][i+1:]
        if potential_peers:
            peers = random.sample(potential_peers, min(random.randint(1, 3), len(potential_peers)))
            for asn2 in peers:
                if asn2 not in topology[asn1]["neighbors"]:
                    topology[asn1]["neighbors"].append(asn2)
                    topology[asn2]["neighbors"].append(asn1)
                    topology[asn1]["relationships"][asn2] = "peer"
                    topology[asn2]["relationships"][asn1] = "peer"
    
    # Connect Tier 3s to Tier 2s
    for asn3 in as_numbers["tier3"]:
        providers = random.sample(as_numbers["tier2"], random.randint(1, 2))
        for asn2 in providers:
            topology[asn3]["neighbors"].append(asn2)
            topology[asn2]["neighbors"].append(asn3)
            topology[asn3]["relationships"][asn2] = "provider"
            topology[asn2]["relationships"][asn3] = "customer"
    
    # Connect IXPs
    for ixp_asn in as_numbers["ixp_content"]:
        connections = random.sample(as_numbers["tier2"], 4) + random.sample(as_numbers["tier3"], 3)
        for asn in connections:
            topology[ixp_asn]["neighbors"].append(asn)
            topology[asn]["neighbors"].append(ixp_asn)
            topology[ixp_asn]["relationships"][asn] = "peer"
            topology[asn]["relationships"][ixp_asn] = "peer"
    
    main_src_as = 41336
    main_dst_as = 35060
    
    if main_dst_as not in topology[main_src_as]["neighbors"]:
        topology[main_src_as]["neighbors"].append(main_dst_as)
        topology[main_dst_as]["neighbors"].append(main_src_as)
        topology[main_src_as]["relationships"][main_dst_as] = "peer"
        topology[main_dst_as]["relationships"][main_src_as] = "peer"
    
    return topology, as_numbers, main_src_as, main_dst_as

topology, as_numbers, main_src_as, main_dst_as = generate_as_topology()
print(f"Generated topology with {len(topology)} ASes")
print(f"Main AS pair: AS{main_src_as} <-> AS{main_dst_as}")

# Generate AS pools
TIER1_ASES = as_numbers["tier1"]
TIER2_ASES = as_numbers["tier2"]
TIER3_ASES = as_numbers["tier3"]
RARE_AS_POOL = list(range(30000, 35000)) + list(range(45000, 50000)) + list(range(55000, 60000))
random.shuffle(RARE_AS_POOL)
print(f"Generated {len(RARE_AS_POOL)} rare ASes")

In [None]:
# ======================================
# PART 4: IP and Interface Allocation
# ======================================

PREDEFINED_PREFIXES = ["203.0.113.0/24", "198.51.100.0/24", "192.0.2.0/24"]
NORMAL_TRAFFIC_ID_RANGE = (0x03E8, 0x7527)
PREFIX_HIJACK_ID_RANGE = (0x7530, 0x9C3F)
PATH_MANIP_ID_RANGE = (0x9C40, 0xC34F)
DOS_ATTACK_ID_RANGE = (0xC350, 0xEA5F)
ROUTE_LEAK_ID_RANGE = (0xEA60, 0xFFFF)

def allocate_ip_addresses(topology, as_numbers, main_src_as, main_dst_as):
    ip_allocations = {}
    
    for tier, asn_list in as_numbers.items():
        for asn in asn_list:
            if tier == "tier1":
                octet1, octet2 = 100, random.randint(64, 127)
            elif tier == "tier2":
                octet1, octet2 = 172, random.randint(16, 31)
            elif tier == "tier3":
                octet1, octet2 = 192, 168
            else:
                octet1, octet2 = 10, random.randint(0, 255)
            
            router_id = f"{octet1}.{octet2}.{random.randint(0, 255)}.1"
            
            if asn == main_src_as:
                announced_prefixes = PREDEFINED_PREFIXES.copy()
            else:
                announced_prefixes = []
                if tier == "tier1":
                    for _ in range(random.randint(1, 2)):
                        prefix = f"203.{random.randint(0, 254)}.0.0/16"
                        if prefix != "203.0.113.0/16":
                            announced_prefixes.append(prefix)
                elif tier == "tier2":
                    for _ in range(random.randint(1, 3)):
                        prefix = f"198.51.{random.randint(0, 99)}.0/24"
                        if prefix != "198.51.100.0/24":
                            announced_prefixes.append(prefix)
                elif tier == "tier3":
                    for _ in range(random.randint(1, 2)):
                        prefix = f"192.0.{random.randint(3, 255)}.0/24"
                        if prefix != "192.0.2.0/24":
                            announced_prefixes.append(prefix)
                else:
                    announced_prefixes.append(f"198.18.{random.randint(0, 255)}.0/24")
                
                if not announced_prefixes:
                    announced_prefixes.append(f"172.{random.randint(20, 30)}.{random.randint(0, 255)}.0/24")
            
            ip_allocations[asn] = {"router_id": router_id, "announced_prefixes": announced_prefixes, "interfaces": {}}
    
    for asn, info in topology.items():
        for neighbor in info["neighbors"]:
            if neighbor in ip_allocations[asn]["interfaces"]:
                continue
            link_net1 = random.randint(0, 255)
            link_net2 = random.randint(0, 255)
            link_net3 = random.randint(0, 63) * 4
            if asn < neighbor:
                ip_allocations[asn]["interfaces"][neighbor] = f"10.{link_net1}.{link_net2}.{link_net3+1}"
                ip_allocations[neighbor]["interfaces"][asn] = f"10.{link_net1}.{link_net2}.{link_net3+2}"
            else:
                ip_allocations[asn]["interfaces"][neighbor] = f"10.{link_net1}.{link_net2}.{link_net3+2}"
                ip_allocations[neighbor]["interfaces"][asn] = f"10.{link_net1}.{link_net2}.{link_net3+1}"
    
    return ip_allocations

ip_allocations = allocate_ip_addresses(topology, as_numbers, main_src_as, main_dst_as)
print(f"IP allocations done for {len(ip_allocations)} ASes")

In [None]:
# ======================================
# PART 5: BGP Session Generator
# ======================================

pkts = []
global_bgp_sessions_ipv4 = {}

def generate_bgp_sessions(topology, ip_allocations):
    bgp_sessions = {}
    all_packets = []
    seq_numbers = {}
    
    for asn, info in topology.items():
        for neighbor in info["neighbors"]:
            if (asn, neighbor) in seq_numbers:
                continue
            
            src_ipv4 = ip_allocations[asn]["interfaces"][neighbor]
            dst_ipv4 = ip_allocations[neighbor]["interfaces"][asn]
            src_router_id = ip_allocations[asn]["router_id"]
            dst_router_id = ip_allocations[neighbor]["router_id"]
            
            src_mac = "00:" + ":".join([f"{random.randint(0, 255):02x}" for _ in range(5)])
            dst_mac = "00:" + ":".join([f"{random.randint(0, 255):02x}" for _ in range(5)])
            
            src_port = random.randint(30000, 65000)
            dst_port = 179
            
            seq_a = random.randint(1000, 10000)
            seq_b = random.randint(1000, 10000)
            seq_numbers[(asn, neighbor)] = (seq_a, seq_b)
            
            tcp_options = [('MSS', 1460)]
            src_ip_id = random.randint(NORMAL_TRAFFIC_ID_RANGE[0], NORMAL_TRAFFIC_ID_RANGE[1])
            dst_ip_id = random.randint(NORMAL_TRAFFIC_ID_RANGE[0], NORMAL_TRAFFIC_ID_RANGE[1])
            
            # TCP handshake
            syn_pkt = Ether(src=src_mac, dst=dst_mac)/IP(src=src_ipv4, dst=dst_ipv4, ttl=1, flags="DF", tos=0xC0, id=src_ip_id)/TCP(sport=src_port, dport=dst_port, flags="S", seq=seq_a, window=16384, options=tcp_options)
            if len(syn_pkt) < 60: syn_pkt = syn_pkt/Padding(load=b'\x00' * (60 - len(syn_pkt)))
            all_packets.append(syn_pkt)
            
            synack_pkt = Ether(src=dst_mac, dst=src_mac)/IP(src=dst_ipv4, dst=src_ipv4, ttl=1, flags=0, tos=0xC0, id=dst_ip_id)/TCP(sport=dst_port, dport=src_port, flags="SA", seq=seq_b, ack=seq_a+1, window=16384, options=tcp_options)
            if len(synack_pkt) < 60: synack_pkt = synack_pkt/Padding(load=b'\x00' * (60 - len(synack_pkt)))
            all_packets.append(synack_pkt)
            
            ack_pkt = Ether(src=src_mac, dst=dst_mac)/IP(src=src_ipv4, dst=dst_ipv4, ttl=1, flags="DF", tos=0xC0, id=src_ip_id+1)/TCP(sport=src_port, dport=dst_port, flags="A", seq=seq_a+1, ack=seq_b+1, window=16384)
            if len(ack_pkt) < 60: ack_pkt = ack_pkt/Padding(load=b'\x00' * (60 - len(ack_pkt)))
            all_packets.append(ack_pkt)
            
            seq_a += 1
            seq_b += 1
            
            # BGP OPEN
            mp_ipv4_cap = BGPCapMultiprotocol(code=1, length=4, afi=1, safi=1)
            rr_std = BGPCapGeneric(code=2, length=0)
            as4_cap = BGPCapFourBytesASN(code=65, length=4, asn=asn)
            opt_params = [BGPOptParam(param_type=2, param_length=len(mp_ipv4_cap), param_value=mp_ipv4_cap),
                          BGPOptParam(param_type=2, param_length=len(rr_std), param_value=rr_std),
                          BGPOptParam(param_type=2, param_length=len(as4_cap), param_value=as4_cap)]
            
            open_a = BGPHeader(type=1)/BGPOpen(version=4, my_as=asn, hold_time=180, bgp_id=src_router_id, opt_params=opt_params)
            open_pkt = Ether(src=src_mac, dst=dst_mac)/IP(src=src_ipv4, dst=dst_ipv4, ttl=1, flags="DF", tos=0xC0, id=src_ip_id+2)/TCP(sport=src_port, dport=dst_port, flags="PA", seq=seq_a, ack=seq_b, window=16384)/open_a
            if len(open_pkt) < 60: open_pkt = open_pkt/Padding(load=b'\x00' * (60 - len(open_pkt)))
            all_packets.append(open_pkt)
            seq_a += len(open_a)
            
            # KEEPALIVE
            keep = BGPKeepAlive()
            keep_pkt = Ether(src=src_mac, dst=dst_mac)/IP(src=src_ipv4, dst=dst_ipv4, ttl=1, flags="DF", tos=0xC0, id=src_ip_id+3)/TCP(sport=src_port, dport=dst_port, flags="PA", seq=seq_a, ack=seq_b, window=16384)/keep
            if len(keep_pkt) < 60: keep_pkt = keep_pkt/Padding(load=b'\x00' * (60 - len(keep_pkt)))
            all_packets.append(keep_pkt)
            seq_a += len(keep)
            
            bgp_sessions[(asn, neighbor)] = {
                "src_ipv4": src_ipv4, "dst_ipv4": dst_ipv4,
                "src_mac": src_mac, "dst_mac": dst_mac,
                "seq_a": seq_a, "seq_b": seq_b,
                "sport": src_port, "dport": dst_port,
                "src_ip_id": src_ip_id + 4, "dst_ip_id": dst_ip_id + 1
            }
    
    return bgp_sessions, all_packets

global_bgp_sessions_ipv4, session_packets = generate_bgp_sessions(topology, ip_allocations)
pkts.extend(session_packets)
print(f"Created {len(global_bgp_sessions_ipv4)} BGP sessions")
print(f"Generated {len(session_packets)} session establishment packets")

In [None]:
# ======================================
# PART 6: V7 TRAFFIC GENERATION - FIXED CORRELATIONS
# ======================================

class PrefixStateTrackerV7:
    def __init__(self):
        self.states = defaultdict(lambda: {'announced': False, 'path': None, 'paths_seen': set(), 'ases_seen': set()})
        self.profiles = {}
    
    def get_profile(self, prefix):
        if prefix not in self.profiles:
            self.profiles[prefix] = sample_prefix_behavior_v7()
        return self.profiles[prefix]
    
    def announce(self, prefix, as_path):
        state = self.states[prefix]
        for asn in as_path:
            state['ases_seen'].add(asn)
        
        if not state['announced']:
            state['announced'] = True
            state['path'] = as_path
            state['paths_seen'].add(tuple(as_path))
            return 'new', 0
        
        prev_path = state['path']
        ed = calculate_edit_distance(prev_path, as_path) if prev_path else 0
        state['paths_seen'].add(tuple(as_path))
        state['path'] = as_path
        
        if as_path == prev_path:
            return 'duplicate', 0
        elif len(as_path) < len(prev_path):
            return 'imp_wd_spath', ed
        elif len(as_path) != len(prev_path):
            return 'imp_wd_dpath', ed
        else:
            return 'imp_wd', ed
    
    def withdraw(self, prefix):
        self.states[prefix]['announced'] = False

def vary_as_path_v7(base_path, tier2_ases, rare_pool, var_type='substitute', target_ed=None, preserve_len=False):
    if not base_path:
        return base_path, 0, False
    new_path = base_path.copy()
    
    if var_type == 'shorten' and len(base_path) > 2 and not preserve_len:
        removes = min(2, len(new_path) - 2)
        for _ in range(max(1, removes)):
            if len(new_path) > 2:
                idx = random.randint(1, len(new_path) - 2)
                new_path.pop(idx)
        return new_path, calculate_edit_distance(base_path, new_path), True
    
    elif var_type == 'lengthen' and not preserve_len:
        pool = rare_pool[:1000] if rare_pool else tier2_ases
        new_as = random.choice(pool)
        attempts = 0
        while new_as in new_path and attempts < 10:
            new_as = random.choice(pool)
            attempts += 1
        if new_as not in new_path and len(new_path) > 1:
            pos = random.randint(1, len(new_path) - 1)
            new_path.insert(pos, new_as)
        return new_path, calculate_edit_distance(base_path, new_path), False
    
    else:  # substitute
        if len(new_path) > 1:
            idx = random.randint(1, len(new_path) - 1)
            new_as = random.choice(tier2_ases)
            attempts = 0
            while new_as in new_path and attempts < 10:
                new_as = random.choice(tier2_ases)
                attempts += 1
            if new_as not in new_path:
                new_path[idx] = new_as
        return new_path, calculate_edit_distance(base_path, new_path), False

def generate_as_path_v7(origin_as, profile):
    path = [origin_as]
    if profile.activity_level == 'unstable':
        length = random.choices([4, 5, 6, 7], weights=[0.15, 0.30, 0.35, 0.20])[0]
    elif profile.activity_level == 'active':
        length = random.choices([4, 5, 6], weights=[0.25, 0.45, 0.30])[0]
    else:
        length = random.choices([3, 4, 5], weights=[0.35, 0.45, 0.20])[0]
    
    for hop in range(length - 1):
        ratio = hop / (length - 1) if length > 1 else 0
        if ratio < 0.3:
            pool = RARE_AS_POOL[:2000]
        elif ratio < 0.7:
            pool = TIER2_ASES
        else:
            pool = TIER1_ASES if random.random() < 0.3 else TIER2_ASES[:5]
        
        next_as = random.choice(pool)
        attempts = 0
        while next_as in path and attempts < 10:
            next_as = random.choice(pool)
            attempts += 1
        if next_as not in path:
            path.append(next_as)
    return path

print("V7 traffic generation functions loaded")

In [None]:
# ======================================
# PART 7: MAIN V7 TRAFFIC GENERATOR
# ======================================

def generate_traffic_v7(peer_ip, all_prefixes, target_events=500):
    """V7: Generate traffic with FIXED correlations"""
    tracker = PrefixStateTrackerV7()
    events = []
    prefix_done = defaultdict(set)
    
    for _ in range(target_events):
        prefix = random.choice(all_prefixes)
        profile = tracker.get_profile(prefix)
        done = prefix_done[prefix]
        
        if profile.activity_level in ['unstable', 'active']:
            origin = random.choice(RARE_AS_POOL[:1500])
        else:
            origin = random.choice(TIER2_ASES)
        
        base_path = generate_as_path_v7(origin, profile)
        t = 0.0
        
        # Single prefix
        if profile.activity_level == 'single':
            if 'single' not in done:
                tracker.announce(prefix, base_path)
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': 'new'})
                done.add('single')
            continue
        
        # V7 FIX 1: STANDALONE WITHDRAWAL -> NADAS (creates withdrawal<->nadas correlation)
        if profile.has_standalone_withdrawals and profile.withdrawal_count > 0 and 'wd_nadas' not in done:
            # First announce
            if not tracker.states[prefix]['announced']:
                tracker.announce(prefix, base_path)
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': 'new'})
                t += random.uniform(1.0, 5.0)
            
            # STANDALONE withdrawal (NOT from flapping)
            tracker.withdraw(prefix)
            events.append({'time': t, 'action': 'withdraw', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': None, 'event_type': 'withdraw', 'is_standalone': True})
            t += random.uniform(0.5, 3.0)
            
            # NADAS: re-announce with NEW AS
            if profile.withdrawal_triggers_nadas:
                for nadas_i in range(profile.nadas_count_after_withdrawal):
                    nadas_path = base_path.copy()
                    new_as = random.choice(RARE_AS_POOL[:2000])
                    attempts = 0
                    while new_as in nadas_path and attempts < 10:
                        new_as = random.choice(RARE_AS_POOL[:2000])
                        attempts += 1
                    if new_as not in nadas_path and len(nadas_path) > 1:
                        nadas_path.insert(random.randint(1, len(nadas_path)-1), new_as)
                    
                    tracker.announce(prefix, nadas_path)
                    events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': nadas_path.copy(), 'event_type': 'nadas', 'is_nadas': True})
                    t += random.uniform(0.1, 1.0)
                    
                    # FIX 13: duplicates with NADAS
                    if profile.duplicates_with_nadas and random.random() < 0.5:
                        events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': nadas_path.copy(), 'event_type': 'duplicate', 'is_nadas_dup': True})
                        t += 0.05
            
            profile.withdrawal_count -= 1
            done.add('wd_nadas')
            continue
        
        # V7 FIX 3: FLAPPING (without automatic withdrawals - decoupled!)
        if profile.is_flapping and profile.flap_count > 0 and 'flap' not in done:
            # Flapping = rapid path changes, NOT withdrawal-announce pairs
            if not tracker.states[prefix]['announced']:
                tracker.announce(prefix, base_path)
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': 'new', 'is_flap': True})
                t += random.uniform(0.5, 2.0)
            
            current_path = base_path.copy()
            for cycle in range(profile.flap_count):
                # Path change during flap (implicit withdrawal)
                var_roll = random.random()
                if var_roll < 0.3 and len(current_path) > 2:
                    new_path, ed, _ = vary_as_path_v7(current_path, TIER2_ASES, RARE_AS_POOL, 'shorten')
                elif var_roll < 0.6:
                    new_path, ed, _ = vary_as_path_v7(current_path, TIER2_ASES, RARE_AS_POOL, 'substitute', preserve_len=True)
                else:
                    new_path, ed, _ = vary_as_path_v7(current_path, TIER2_ASES, RARE_AS_POOL, 'lengthen')
                
                evt_type, _ = tracker.announce(prefix, new_path)
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': new_path.copy(), 'event_type': evt_type, 'is_flap': True, 'edit_distance': ed})
                current_path = new_path
                t += random.uniform(0.5, 3.0)
            
            profile.flap_count = 0
            done.add('flap')
            continue
        
        # V7 FIX 5: imp_wd_spath -> withdrawal
        if profile.has_imp_wd_spath and profile.imp_wd_spath_triggers_withdrawal and profile.imp_wd_spath_count > 0 and len(base_path) > 2 and 'imp_wd_spath' not in done:
            if not tracker.states[prefix]['announced']:
                tracker.announce(prefix, base_path)
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': 'new'})
                t += random.uniform(0.5, 2.0)
            
            short_path, ed, _ = vary_as_path_v7(base_path, TIER2_ASES, RARE_AS_POOL, 'shorten')
            tracker.announce(prefix, short_path)
            events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': short_path.copy(), 'event_type': 'imp_wd_spath', 'edit_distance': ed})
            t += random.uniform(1.0, 3.0)
            
            tracker.withdraw(prefix)
            events.append({'time': t, 'action': 'withdraw', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': None, 'event_type': 'withdraw', 'follows_imp_wd_spath': True})
            
            profile.imp_wd_spath_count -= 1
            done.add('imp_wd_spath')
            continue
        
        # V7 FIX 8: imp_wd -> withdrawal
        if profile.has_imp_wd and profile.imp_wd_triggers_withdrawal and profile.imp_wd_count > 0 and 'imp_wd' not in done:
            if not tracker.states[prefix]['announced']:
                tracker.announce(prefix, base_path)
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': 'new'})
                t += random.uniform(0.5, 2.0)
            
            new_path, ed, _ = vary_as_path_v7(base_path, TIER2_ASES, RARE_AS_POOL, 'substitute', preserve_len=True)
            tracker.announce(prefix, new_path)
            events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': new_path.copy(), 'event_type': 'imp_wd', 'edit_distance': ed})
            t += random.uniform(1.0, 3.0)
            
            tracker.withdraw(prefix)
            events.append({'time': t, 'action': 'withdraw', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': None, 'event_type': 'withdraw', 'follows_imp_wd': True})
            
            profile.imp_wd_count -= 1
            done.add('imp_wd')
            continue
        
        # V7 FIX 4: Standalone duplicates
        if profile.has_duplicates and profile.duplicates_standalone and profile.duplicate_count > 0 and 'standalone_dup' not in done:
            if not tracker.states[prefix]['announced']:
                tracker.announce(prefix, base_path)
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': 'new'})
                t += random.uniform(0.5, 2.0)
            
            for _ in range(profile.duplicate_count):
                events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': 'duplicate', 'is_standalone_dup': True})
                t += random.uniform(0.01, 0.3)
            
            profile.duplicate_count = 0
            done.add('standalone_dup')
            continue
        
        # Default: simple announcement
        evt_type, ed = tracker.announce(prefix, base_path)
        events.append({'time': t, 'action': 'announce', 'prefix': prefix, 'peer_ip': peer_ip, 'as_path': base_path.copy(), 'event_type': evt_type, 'edit_distance': ed})
    
    return events, tracker

print("V7 main traffic generator ready")

In [None]:
# ======================================
# PART 8: GENERATE V7 TRAFFIC
# ======================================

# Get all prefixes
all_prefixes = PREDEFINED_PREFIXES.copy()
for asn, info in ip_allocations.items():
    all_prefixes.extend(info["announced_prefixes"])
all_prefixes = list(set(all_prefixes))

# Get peer IP
peer_ip = ip_allocations[main_src_as]["interfaces"].get(main_dst_as, ip_allocations[main_src_as]["router_id"])

print("Generating V7 traffic with FIXED correlations...")
print(f"Using {len(all_prefixes)} prefixes")

# Generate events
TARGET_EVENTS = 1000  # Adjust as needed
events, tracker = generate_traffic_v7(peer_ip, all_prefixes, target_events=TARGET_EVENTS)

print(f"Generated {len(events)} events")

# Statistics
stats = {
    'total': len(events),
    'announcements': sum(1 for e in events if e['action'] == 'announce'),
    'withdrawals': sum(1 for e in events if e['action'] == 'withdraw'),
    'standalone_wd': sum(1 for e in events if e.get('is_standalone')),
    'nadas': sum(1 for e in events if e.get('is_nadas')),
    'flaps': sum(1 for e in events if e.get('is_flap')),
    'duplicates': sum(1 for e in events if e['event_type'] == 'duplicate'),
    'imp_wd': sum(1 for e in events if e['event_type'] == 'imp_wd'),
    'imp_wd_spath': sum(1 for e in events if e['event_type'] == 'imp_wd_spath'),
}

print("\nV7 Traffic Statistics:")
for k, v in stats.items():
    print(f"  {k}: {v}")

# Correlation indicators
if stats['withdrawals'] > 0:
    print(f"\nCorrelation Indicators:")
    print(f"  NADAS/Withdrawals: {stats['nadas']/max(1,stats['withdrawals']):.2f} (target: ~0.67)")
    print(f"  Flaps/Withdrawals: {stats['flaps']/max(1,stats['withdrawals']):.2f} (target: ~0.42)")
    print(f"  Dups/Announcements: {stats['duplicates']/max(1,stats['announcements']):.2f} (target: ~0.33)")

In [None]:
# ======================================
# PART 9: CONVERT EVENTS TO PACKETS
# ======================================

def event_to_packet(event, session_info, main_src_as):
    packets = []
    src_ipv4 = session_info["src_ipv4"]
    dst_ipv4 = session_info["dst_ipv4"]
    src_mac = session_info["src_mac"]
    dst_mac = session_info["dst_mac"]
    sport = session_info["sport"]
    dport = session_info["dport"]
    seq_a = session_info["seq_a"]
    seq_b = session_info["seq_b"]
    src_ip_id = session_info["src_ip_id"]
    
    prefix = event['prefix']
    as_path = event.get('as_path', [main_src_as])
    
    if event['action'] == 'announce':
        origin = BGPPathAttr(type_flags=0x40, type_code=1)
        origin.attribute = BGPPAOrigin(origin=0)
        
        as_path_attr = BGPPathAttr(type_flags=0x40, type_code=2)
        as_path_segment = BGPPAASPath()
        segment = BGPPAASPath.ASPathSegment(segment_type=2, segment_length=len(as_path), segment_value=as_path)
        as_path_segment.segments = [segment]
        as_path_attr.attribute = as_path_segment
        
        next_hop = BGPPathAttr(type_flags=0x40, type_code=3)
        next_hop.attribute = BGPPANextHop(next_hop=src_ipv4)
        
        med = BGPPathAttr(type_flags=0x80, type_code=4)
        med.attribute = BGPPAMultiExitDisc(med=100)
        
        local_pref = BGPPathAttr(type_flags=0x40, type_code=5)
        local_pref.attribute = BGPPALocalPref(local_pref=200)
        
        update = BGPHeader(type=2)/BGPUpdate()
        update.path_attr = [origin, as_path_attr, next_hop, med, local_pref]
        update.nlri.append(BGPNLRI_IPv4(prefix=prefix))
        
        pkt = Ether(src=src_mac, dst=dst_mac)/IP(src=src_ipv4, dst=dst_ipv4, ttl=1, flags="DF", tos=0xC0, id=src_ip_id)/TCP(sport=sport, dport=dport, flags="PA", seq=seq_a, ack=seq_b, window=16384)/update
        if len(pkt) < 60:
            pkt = pkt/Padding(load=b'\x00' * (60 - len(pkt)))
        packets.append(pkt)
        session_info["seq_a"] = seq_a + len(update)
        session_info["src_ip_id"] = src_ip_id + 1
        
    elif event['action'] == 'withdraw':
        update = BGPHeader(type=2)/BGPUpdate()
        update.withdrawn_routes = [BGPNLRI_IPv4(prefix=prefix)]
        
        pkt = Ether(src=src_mac, dst=dst_mac)/IP(src=src_ipv4, dst=dst_ipv4, ttl=1, flags="DF", tos=0xC0, id=src_ip_id)/TCP(sport=sport, dport=dport, flags="PA", seq=seq_a, ack=seq_b, window=16384)/update
        if len(pkt) < 60:
            pkt = pkt/Padding(load=b'\x00' * (60 - len(pkt)))
        packets.append(pkt)
        session_info["seq_a"] = seq_a + len(update)
        session_info["src_ip_id"] = src_ip_id + 1
    
    return packets

# Convert all events to packets
session_key = (main_src_as, main_dst_as)
if session_key not in global_bgp_sessions_ipv4:
    session_key = (main_dst_as, main_src_as)

if session_key in global_bgp_sessions_ipv4:
    session_info = global_bgp_sessions_ipv4[session_key]
    
    for event in events:
        new_pkts = event_to_packet(event, session_info, main_src_as)
        pkts.extend(new_pkts)

print(f"Total packets generated: {len(pkts)}")

In [None]:
# ======================================
# PART 10: SAVE OUTPUT FILES
# ======================================

timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

# Save PCAP
pcap_file = f"{OUTPUT_DIR}/bgp_traffic_v7_{timestamp}.pcap"
wrpcap(pcap_file, pkts)
print(f"PCAP saved to: {pcap_file}")

# Save CSV for feature extraction
csv_file = f"{RESULTS_DIR}/bgp_updates_analysis_{timestamp}.csv"
with open(csv_file, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['Timestamp', 'Type', 'Subtype', 'Peer_IP', 'Peer_ASN', 'Prefix', 'AS_Path', 'Origin', 'Next_Hop', 'MED', 'Local_Pref', 'Communities', 'Label'])
    
    base_time = datetime.datetime.now()
    for event in events:
        ts = base_time + datetime.timedelta(seconds=event.get('time', 0))
        subtype = 'ANNOUNCE' if event['action'] == 'announce' else 'WITHDRAW'
        as_path = event.get('as_path', [])
        as_path_str = ' '.join(map(str, as_path)) if as_path else ''
        
        writer.writerow([
            ts.strftime('%Y-%m-%d %H:%M:%S.%f'),
            'UPDATE', subtype, peer_ip, main_src_as, event['prefix'],
            as_path_str, 'IGP', peer_ip, 100, 200, f'{main_src_as}:200', 'normal'
        ])

print(f"CSV saved to: {csv_file}")

print("\n" + "="*60)
print("V7 GENERATION COMPLETE!")
print("="*60)
print(f"\nFiles created:")
print(f"  PCAP: {pcap_file}")
print(f"  CSV:  {csv_file}")
print(f"\nNow run feature extraction on the CSV file.")