In [1]:
# Imports
import folium
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import networkx as nx
import json

In [2]:
# Read Files on Network Topology
pd_net_raw = pd.read_csv('network_parent_child.csv')
pd_dev_raw = pd.read_csv('device_geo_data.csv')
pd_dev_unique = pd.read_csv('devices.csv')

In [3]:
# Opening JSON file
f = open('cluster_result.json')
clusters = json.load(f)
f.close()

In [4]:
ls_mac_unique = set()
for s in clusters:
    ls_mac_unique.update(s)

In [5]:
print('{} Clusters Found, {} Total MAC'.format(len(clusters), sum([len(ls) for ls in clusters])))
for i, ls in enumerate(clusters):
    print('Cluster {}, {} Total MACs'.format(i, len(ls)))

5 Clusters Found, 1046 Total MAC
Cluster 0, 211 Total MACs
Cluster 1, 664 Total MACs
Cluster 2, 142 Total MACs
Cluster 3, 28 Total MACs
Cluster 4, 1 Total MACs


In [6]:
print('{} Clusters Found, {} Unique MACs'.format(len(clusters), len(ls_mac_unique)))
for i, ls in enumerate(clusters):
    print('Cluster {}, {} Unique MACs'.format(i, len(set(ls))))

5 Clusters Found, 350 Unique MACs
Cluster 0, 71 Unique MACs
Cluster 1, 223 Unique MACs
Cluster 2, 48 Unique MACs
Cluster 3, 10 Unique MACs
Cluster 4, 1 Unique MACs


In [7]:
# All MAC IDs correspond to addresses when matched to the devices and nodes
pd_net_raw.merge(pd_dev_raw, how='right', on='geo_id').dropna().device_type.unique()

array(['address'], dtype=object)

In [8]:
pd_dev_filter = pd_dev_raw[pd_dev_raw.mac.isin(ls_mac_unique)].reset_index(drop=True)

In [9]:
pd_dev_valid = pd_net_raw.merge(pd_dev_filter, how='inner', on='geo_id')

In [10]:
# Join tables for tail -> head information in directed graph
pd_links = pd_net_raw.merge(pd_net_raw, left_on='parent_id', right_on='geo_id', how='left', suffixes=['_tail','_head'])
pd_links.head()
# Creating Imputation Table
pd_links_fill = pd_links[['geo_id_head','parent_id_head','lon_head','lat_head','device_type_head']].copy()
pd_links_fill.columns = ['geo_id_tail','parent_id_tail','lon_tail','lat_tail','device_type_tail']
pd_links_fill['lat_tail'] = pd_links_fill.lat_tail + 0.0001
pd_links_fill['lon_tail'] = pd_links_fill.lon_tail + 0.0001
pd_links_fill.head()
# Imputing location where the geo_id exists but lat/lon is missing, and lat/lon is available for the parent
pd_links = pd_links.fillna(pd_links_fill[['lon_tail','lat_tail']])
pd_links.head()
# Create Graph using networkX
G = nx.DiGraph()
# Filter links to only those valid. Terminating nodes are either optical nodes or cox USPS nodes
G.add_edges_from(pd_links[~pd_links.geo_id_head.isna()][['parent_id_tail','geo_id_tail']].to_numpy().astype(int).tolist())
G.add_nodes_from(pd_links.geo_id_tail.to_numpy())

In [11]:
ls_nodes_find = list(pd_dev_valid.geo_id)

In [12]:
# Finding a single subgraph with all search nodes common to
def find_subgraph(searchlist, graph):
    assert isinstance(searchlist, list)
    assert isinstance(graph, nx.DiGraph)
    
    searchlist = set(searchlist)
    
    # Find all subgraphs
    ls_subgraphs = list(nx.weakly_connected_components(graph))

    # Check if all points exist in each subgraph, if true, return subgraph nodes
    for graph_nodes in ls_subgraphs:
        if searchlist.issubset(graph_nodes):
            return G.subgraph(graph_nodes)
    
    # Otherwise, Nodes are from disconnected subgraphs (and cannot have an MCA)
    raise AssertionError('Nodes are from disconnected subgraphs')

In [13]:
# Subgraph that all points are common to, from the node/device list
search_graph = find_subgraph(searchlist=ls_nodes_find, graph=G)

# Create DF for plotting
pd_links_plot_all = pd_links[pd_links.geo_id_tail.isin(list(search_graph.nodes()))]
pd_links_plot_all_mac = pd_links_plot_all.merge(pd_dev_filter, left_on='geo_id_tail', right_on='geo_id', how='left')

# Unique Device Types
ls_device_types = list(pd_net_raw.device_type.unique())

In [14]:
# Plot Settings
dict_device_icons = {
    'address':              {'color': 'lightblue', 'icon': 'home'},
    'tap':                  {'color': 'blue', 'icon': 'exchange'},
    
    'amplifier':            {'color': 'green', 'icon': 'bolt'},
    'power_inserter':       {'color': 'darkgreen', 'icon': 'bolt'},
    'power_block':          {'color': 'lightgreen', 'icon': 'bolt'},
    
    'in_line_equalizer':    {'color': 'gray', 'icon': 'arrows'},
    'coaxial_splitter':     {'color': 'beige', 'icon': 'arrows'},
    'internal_coupler':     {'color': 'lightgray', 'icon': 'arrows'},
    
    'cox_usps_address':     {'color': 'cadetblue', 'icon': 'server'},
    'optical_node':         {'color': 'darkpurple', 'icon': 'server'}
    }

ls_disperse_pt = ['amplifier', 'cox_usps_address', 'power_inserter', 'in_line_equalizer', 'power_block', 'coaxial_splitter', 'internal_coupler', 'optical_node']

# Find the "Network Node" (Cox Communications terminally, really a "neighborhood") common to all points in the subgraph
pd_find_node = pd_links_plot_all.merge(pd_dev_raw, left_on='geo_id_tail', right_on='geo_id').groupby('node').count()
display(pd_find_node)
print('Mapping for node {}'.format(pd_find_node.index[0]))

Unnamed: 0_level_0,geo_id_tail,parent_id_tail,lon_tail,lat_tail,device_type_tail,geo_id_head,parent_id_head,lon_head,lat_head,device_type_head,geo_id,mac
node,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3VAC1,51,51,51,51,51,51,51,51,51,51,51,51


Mapping for node 3VAC1


In [15]:
ls_device_types_show = ['amplifier', 'cox_usps_address', 'power_inserter', 'in_line_equalizer', 'power_block', 'coaxial_splitter', 'internal_coupler', 'optical_node']

In [16]:
# Define points to plot
pt_net_link_plot = pd_links_plot_all_mac
pd_net_pt_plot = pd_links_plot_all_mac

# Start at mean location
startloc = [pd_net_pt_plot.lat_tail.mean(), pd_net_pt_plot.lon_tail.mean()]
my_map = folium.Map(location = startloc, zoom_start = 15)

# Create Base Connections Feature Group
feature_group_pl = folium.FeatureGroup("Local Subnetwork")
# Define Links to plot (Filter out links that are missing coordinates)
pd_net_pl_plot = pt_net_link_plot[~pt_net_link_plot.isna()[['lon_tail','lat_tail','lon_head','lat_head']].any(axis=1)]
# Plot connetions as polylines
for lat_tail, lng_tail, name, lat_head, lng_head in pd_net_pl_plot[['lat_tail','lon_tail','device_type_tail','lat_head','lon_head']].to_numpy():
    feature_group_pl.add_child(folium.vector_layers.PolyLine(locations=[(lat_tail,lng_tail),(lat_head,lng_head)]))

# Create Point Feature Groups
for name in ls_device_types:
    globals()['feature_group_{}'.format(name)] = folium.FeatureGroup(name)
# Go through every group of point features
for name, iconsetting in dict_device_icons.items():
    # Filter features from df
    pd_pt_filter = pd_net_pt_plot[pd_net_pt_plot.device_type_tail == name]
    # Define parameters
    for lat, lng, id, name in pd_pt_filter[['lat_tail','lon_tail','geo_id_tail','device_type_tail']].to_numpy():
        if name in ls_disperse_pt:
            # Horizontal dispersion for certain features (overlapping)
            dispersion = np.random.uniform(-0.000025, 0.000025)
            dispersion = dispersion + 0.000025 if dispersion >= 0 else dispersion - 0.000025
            lng += dispersion
        # Plot points
        globals()['feature_group_{}'.format(name)].add_child(folium.Marker(location=[lat,lng],popup='ID={}\nDevice={}'.format(str(id),name),icon=folium.Icon(
            icon=iconsetting['icon'], color='lightgray', prefix='fa')))

# Set Cluster Palette
ls_cluster_palette = ['red','blue','green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 
'darkgreen', 'cadetblue', 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray']

# Create Cluster Points feature groups
for cluster_id in range(len(clusters)):
    globals()['feature_group_cluster_{}'.format(cluster_id)] = folium.FeatureGroup('cluster_{}'.format(cluster_id))
# Go through every group of point features
for cluster_id, ls_cluster in enumerate(clusters):
    # Filter features from df
    pd_pt_filter = pd_net_pt_plot[pd_net_pt_plot.mac.isin(ls_cluster)]
    # Define parameters
    for lat, lng, id, name in pd_pt_filter[['lat_tail','lon_tail','geo_id_tail','device_type_tail']].to_numpy():
        # Plot points
        globals()['feature_group_cluster_{}'.format(cluster_id)].add_child(folium.Marker(location=[lat,lng],popup='ID={}\nDevice={}'.format(str(id),name),icon=folium.Icon(
            icon='cloud', color=ls_cluster_palette[cluster_id], prefix='fa')))

# Add Feature Groups
my_map.add_child(feature_group_pl)

# for name in ls_device_types_show:
#     my_map.add_child(globals()['feature_group_{}'.format(name)])

for cluster_id in range(len(clusters)):
    my_map.add_child(globals()['feature_group_cluster_{}'.format(cluster_id)])

my_map.add_child(folium.map.LayerControl('topright', collapsed= False))

# Show
my_map