In [None]:
import os
from collections import namedtuple
from pprint import pprint
import pandas as pd
from tqdm import tqdm
import ipaddress
import numpy as np
import matplotlib.pyplot as plt

In [None]:
traffic = pd.read_csv('traces/traffic_flows.csv')
drop_cols = traffic.columns[1]
traffic = traffic.drop(columns=['lpid'])
traffic.head()

Unnamed: 0,switch_id,dest_net,mask,dest_switch_ip,port_bw,mean_wait_time,load
0,8,8.25.212.212,31,8.25.212.212,25000000000.0,497,0.9
1,296,8.23.244.196,31,8.23.244.197,25000000000.0,497,0.9
2,11,8.23.243.122,31,8.23.243.123,25000000000.0,497,0.9
3,69,8.25.188.40,31,8.25.188.40,25000000000.0,497,0.9
4,299,8.25.210.211,32,8.25.210.211,25000000000.0,497,0.9


In [5]:
path = os.getcwd() + "/topologies/zte_topo/"
switch_config_paths = [path + conf for conf in os.listdir(path)]

In [6]:
'''
    namedtuples defining a switch
'''
port = namedtuple("port", ["name","bandwidth", "ip", "mask"])
port_from_list = lambda x: port(
    name        = x[0],
    bandwidth   = int(x[1]),
    ip          = x[2],
    mask        = int(x[3])
)

route = namedtuple("route", ["src", "mask", "dest", "dest_id", "port"])
route_from_list = lambda x: route(
    src     = x[0], 
    mask    = int(x[1]), 
    dest    = x[2],
    dest_id = int(x[3]), 
    port    = x[4]
)

topo = namedtuple("topo", ["dest_id", "src_port", "dest_port"])
topo_from_list = lambda x: topo(
    dest_id    = int(x[0]), 
    src_port   = x[1], 
    dest_port  = x[2])

switch = namedtuple("switch", ["id", "type", "ports", "routes", "topos"])

# formatted print for switches
show_switch = lambda x: pprint(x._asdict())

In [7]:
def switch_from_config(conf):
    '''
        Creates switch namedtuples from switch config files
    '''
    with open(conf, 'r') as f:
        switch_data_lines = [line.strip().split(',') for line in f.readlines()]

    ports, routes, topos = [], [], []
    for switch_data in switch_data_lines:    
        match switch_data.pop(0):
            case "info":
                id, switch_type = int(switch_data[0]), switch_data[1]
            case "port":
                ports.append(port_from_list(switch_data))
            case "route":
                routes.append(route_from_list(switch_data))
            case "topo":
                topos.append(topo_from_list(switch_data))

    return switch(
        id      = id, 
        type    = switch_type, 
        ports   = ports, 
        routes  = routes, 
        topos   = topos
    )

In [8]:
subnet_ids = []
with open("largest_subnet.txt", 'r') as f:
    for line in f:
        subnet_ids.append(int(line.strip()))

In [9]:
switches, switch_ip_info = {}, {}
out = []
for idx, path in tqdm(enumerate(switch_config_paths)):
    sw = switch_from_config(path)

    if sw.id not in subnet_ids:
        continue

    for t in sw.topos:
        if t.dest_id not in subnet_ids:
            out.append(f'{sw.id} has node {t.dest_id} in topo...')
    
    switches[sw.id] = sw

    ports = []
    for p in sw.ports:
        try:
            ports.append(ipaddress.ip_address(p.ip))
        except:
            pass
    
    subnets = []
    for r in sw.routes:
        try:
            subnets.append((ipaddress.ip_network(f"{r.src}/{r.mask}"), r.dest_id))
        except:
            pass

    switch_ip_info[sw.id] = {
        'ports'   : ports,
        'subnets' : subnets
    }

9243it [05:02, 30.53it/s]


In [11]:
print(len(switches), len(subnet_ids), f"equal? {len(switches) == len(subnet_ids)}")

5097 5097 equal? True


In [54]:
def calculate_flow_path(flow):
    source = flow['switch_id']
    dest   = ipaddress.ip_address(flow['dest_switch_ip'])

    current_switch = switches[source]

    flow_path = []

    while True:
        # if we have looped back to a switch we have visited before - this is an invalid circular flow 
        if current_switch.id in flow_path:
            reason = f"reached a previously visited switch ({current_switch.id}) | path so far: {flow_path}"
            return flow_path, False, reason
        
        # add switch to flow path
        flow_path.append(current_switch.id)

        # get switch ports and routes to subnetworks
        switch_ports   = switch_ip_info[current_switch.id]['ports']
        switch_subnets = switch_ip_info[current_switch.id]['subnets']

        # check if we reached dest
        if dest in switch_ports:
            return flow_path, True, 'N/A'
        
        # get a list of (subnet, mask, dest_id) for all subnets that contain the dest ip
        candidate_next_hops = []

        for route, dest_id in switch_subnets:
            # cond1 : destination IP is contained in the subnet of the route
            # cond2 : next hop for that route is not us
            # cond3 : next hop is part of the main subnet
            if dest in route and dest_id != current_switch.id and dest_id in subnet_ids:
                candidate_next_hops.append((route, route.prefixlen, dest_id))

        # if no next hop candidates - invalid
        if not candidate_next_hops:
            reason = f"no valid hops at {current_switch.id} | path so far: {flow_path}"
            return flow_path, False, reason
        
        # get the subnet with the largest netmask that contains the dest ip
        largest_net_mask_subnet = max(candidate_next_hops, key= lambda x:x[1])
        next_hop_id = largest_net_mask_subnet[2]

        # if we try to hop to a switch outside of the main network denote the route as invalid
        try:
            current_switch = switches[next_hop_id]
        except:
            reason = f"{current_switch.id} tried to hop to {next_hop_id} which is NOT in the subnet | path so far: {flow_path}"
            print('ATTENTION:', reason)
            return flow_path, False, reason

In [55]:
'''
    Trace the route s
'''
flow_traces = {}

data_for_dataframe = []

for idx, flow in tqdm(enumerate(traffic.itertuples())):
    flow = flow._asdict()
    flow_trace, valid, reason = calculate_flow_path(flow)

    data_for_dataframe.append([
        flow['switch_id'],
        flow['dest_switch_ip'],
        valid,
        reason,
        flow_trace
    ])

    flow_traces[idx] = {
        'path': flow_trace,
        'mean_wait_time': flow['mean_wait_time'],
        'valid': valid
    }

valid_flows = {key:value for key,value in flow_traces.items() if value['valid'] }

4288it [00:31, 134.94it/s]


In [56]:
df_flow_info = pd.DataFrame(data_for_dataframe, columns=['source', 'destination', 'valid', 'reason', 'path'])
df_flow_info = df_flow_info.sort_values('source')
df_flow_info.to_excel('output.xlsx', index=False)