# Graph Clustering using Louvain Clustering

In [None]:
import networkx as nx
import community
import matplotlib.pyplot as plt
from matplotlib import cm

In [None]:
# Network X provides a graph data structure for our use
g = nx.Graph()

In [None]:
filename = "data/two-hour-sample.csv"

# 192.168.0.1 is the router and DNS server, every internal host connects to it so we exclude it from our graph
ignore = set("192.168.0.1")

In [None]:
# Process the file, tracking internal IPs and external IPs separately
# NOTE: our internal IP range is 192.168.0.0/24

internal_nodes = {}
external_nodes = {}

inputfile = open(filename, 'r')
for flow in inputfile:
    flow = flow.strip()
    fields = flow.split(",")
    src = fields[4] # Project only the source and destination IP addresses
    dest = fields[7] # ignore all of the other fields
    if dest not in ignore:
        g.add_node(src)
        if src.startswith("192.168.0."):
            internal_nodes[src] = 1
        else:
            external_nodes[src] = 1
        g.add_node(dest)
        if dest.startswith("192.168.0."):
            internal_nodes[dest] = 1
        else:
            external_nodes[dest] = 1
        g.add_edge(src, dest)
        #print(src + " " + dest)

In [None]:
outdeg = nx.degree(g)
print(g.number_of_nodes())
draw_graph = nx.Graph()
draw_graph.add_nodes_from(internal_nodes.keys())

In [None]:
# We are doing some magic here to remove external nodes 
# that only connect to a single internal host. These nodes cannot contribute
# to the clustering and are removed.
external_print_nodes = []
for node in external_nodes.keys():
    if outdeg[node] > 1:
        external_print_nodes.append(node)
        draw_graph.add_node(node)

edges_to_draw = []
edges = nx.edges(g)
for edge in edges:
    both_internal = edge[0] in internal_nodes.keys(
    ) and edge[1] in internal_nodes.keys()
    src_internal = edge[0] in internal_nodes.keys(
    ) and edge[1] in external_print_nodes
    dest_internal = edge[1] in internal_nodes.keys(
    ) and edge[0] in external_print_nodes

    if both_internal or src_internal or dest_internal:
        edges_to_draw.append(edge)
        draw_graph.add_edge(edge[0], edge[1])

In [None]:
n = draw_graph.number_of_nodes()
e = draw_graph.number_of_edges()
print(n)
print(e)

In [None]:
# Run Louvain Clustering 
cluster_graph = nx.Graph()

for edge in nx.edges(draw_graph):
    cluster_graph.add_edge(edge[0], edge[1])

clusters = community.best_partition(cluster_graph)

In [None]:
# Plot the clustered graph with colors identifing the clusters
labels = {}
for node, c in clusters.items():
        cluster = []
        if c in labels.keys():
            cluster = labels[c]
        cluster.append(node)
        labels[c] = cluster

greens = cm.get_cmap('Greens', 13)
colors = greens(range(13))

print(len(labels.keys()))

pos = nx.spring_layout(draw_graph, 0.05)  # positions for all nodes

index = 0
for _, cluster in labels.items():
    nx.draw_networkx_nodes(draw_graph, pos,
                           nodelist=cluster,
                           node_color=colors[index],
                           node_size=10)
    index = index + 1


nx.draw_networkx_edges(draw_graph, pos, edges_to_draw, width=0.8)

plt.axis('off')
plt.show()