# Network Traffic Graph Analysis

Analysis of network traffic data from EVE logs (Suricata).
Building a communication graph from IP connections to URL and analyzing its structure.

Sample of Suricata log:

`{"timestamp":"2026-01-17T14:31:22.564592+0000","flow_id":387273494696517,"in_iface":"eth0","event_type":"http","src_ip":"192.168.65.1","src_port":37570,"dest_ip":"192.168.65.7","dest_port":2376,"proto":"TCP","ip_v":4,"pkt_src":"wire/pcap","tx_id":0,"http":{"hostname":"api.moby.localhost","url":"/v1.52/containers/a8e09e219c2cabdf4e366d688265735b97c491cf106c222ab599bfdf024fa044/stats?stream=false","http_user_agent":"Docker-Client/29.1.3 (windows)","http_content_type":"application/json","http_method":"GET","protocol":"HTTP/1.1","status":200,"length":2060}}`

In [None]:
import json
import collections
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

## 1) Load and build the graph IP <-> URL

Load the EVE JSON file and build a directed graph with IP nodes and URL nodes.

In [None]:
import json
import networkx as nx

G = nx.DiGraph()

with open('../data/eve.json', 'r', encoding='utf-8') as f:
    for line in f:
        if not line.strip():
            continue
        event = json.loads(line)
        src = event.get('src_ip')
        dest = event.get('dest_ip')
        http_url = event.get('http', {}).get('url')
        event_type = event.get('event_type')

        # Créer des nœuds “plus riches”
        if src:
            G.add_node(src, type='ip')
        if dest:
            G.add_node(dest, type='ip')
        if http_url:
            G.add_node(http_url, type='url')

        # Ajouter des liens multiples
        if src and dest:
            G.add_edge(src, dest, event_type=event_type)
        if src and http_url:
            G.add_edge(src, http_url, event_type='http_request')
        if http_url and dest:
            G.add_edge(http_url, dest, event_type='http_response')

print(f"Graph ready: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


In [None]:
# Analyze node types
node_types = {}
for node in G.nodes():
    node_type = G.nodes[node].get('type', 'unknown')
    node_types[node_type] = node_types.get(node_type, 0) + 1

print(f"Node types: {node_types}")
print(f"Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
print(f"Graph type: {'Directed' if G.is_directed() else 'Undirected'}")

## 2) Basic graph properties

In [None]:
nodes = G.number_of_nodes()
edges = G.number_of_edges()
density = nx.density(G)

print(f"Nodes: {nodes}")
print(f"Edges: {edges}")
print(f"Density: {density:.6f}")
print(f"Graph is directed: {G.is_directed()}")

## 3) Degree analysis

In [None]:
# For directed graphs, analyze in-degree, out-degree, and total degree
in_degrees = dict(G.in_degree())
out_degrees = dict(G.out_degree())
total_degrees = dict(G.degree())

in_degree_values = list(in_degrees.values())
out_degree_values = list(out_degrees.values())
total_degree_values = list(total_degrees.values())

print("=== Total Degree (in + out) ===")
print(f"Min: {min(total_degree_values)}, Max: {max(total_degree_values)}")
print(f"Mean: {np.mean(total_degree_values):.2f}, Median: {np.median(total_degree_values):.2f}")
print(f"Std deviation: {np.std(total_degree_values):.2f}")

print("\n=== In-Degree ===")
print(f"Min: {min(in_degree_values)}, Max: {max(in_degree_values)}")
print(f"Mean: {np.mean(in_degree_values):.2f}, Median: {np.median(in_degree_values):.2f}")

print("\n=== Out-Degree ===")
print(f"Min: {min(out_degree_values)}, Max: {max(out_degree_values)}")
print(f"Mean: {np.mean(out_degree_values):.2f}, Median: {np.median(out_degree_values):.2f}")

# Top nodes by total degree
top_nodes = sorted(total_degrees.items(), key=lambda x: x[1], reverse=True)[:5]
print(f"\nTop 5 nodes by total degree:")
for node, deg in top_nodes:
    node_type = G.nodes[node].get('type', 'unknown')
    print(f"  {node[:50]}... ({node_type}): {deg} (in: {in_degrees[node]}, out: {out_degrees[node]})")

In [None]:
# Degree distributions
total_degree_count = dict(collections.Counter(total_degree_values))
in_degree_count = dict(collections.Counter(in_degree_values))
out_degree_count = dict(collections.Counter(out_degree_values))

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Total degree
axes[0].bar(total_degree_count.keys(), total_degree_count.values(), color='steelblue')
axes[0].set_title("Total Degree Distribution")
axes[0].set_xlabel("Degree")
axes[0].set_ylabel("Count")
axes[0].set_yscale('log')
axes[0].grid(True, alpha=0.3)

# In-degree
axes[1].bar(in_degree_count.keys(), in_degree_count.values(), color='coral')
axes[1].set_title("In-Degree Distribution")
axes[1].set_xlabel("In-Degree")
axes[1].set_ylabel("Count")
axes[1].set_yscale('log')
axes[1].grid(True, alpha=0.3)

# Out-degree
axes[2].bar(out_degree_count.keys(), out_degree_count.values(), color='lightgreen')
axes[2].set_title("Out-Degree Distribution")
axes[2].set_xlabel("Out-Degree")
axes[2].set_ylabel("Count")
axes[2].set_yscale('log')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4) Connected components

In [None]:
# For directed graphs, use weakly connected components
components = sorted(nx.weakly_connected_components(G), key=len, reverse=True)
component_sizes = [len(comp) for comp in components]

print(f"Number of weakly connected components: {len(components)}")
print(f"Largest component size: {component_sizes[0] if component_sizes else 0}")
print(f"Top 10 component sizes: {component_sizes[:10]}")

In [None]:
# Extract giant component
if components:
    Ggiant = G.subgraph(components[0])
    print(f"Giant component: {Ggiant.number_of_nodes()} nodes, {Ggiant.number_of_edges()} edges")
else:
    Ggiant = G
    print("No components found")

## 5) Path length analysis (on giant component)

In [None]:
if Ggiant.number_of_nodes() > 1:
    try:
        # For directed graphs, check if strongly connected
        if nx.is_strongly_connected(Ggiant):
            diameter = nx.diameter(Ggiant)
            avg_path_length = nx.average_shortest_path_length(Ggiant)
            print(f"Graph is strongly connected")
            print(f"Diameter: {diameter}")
            print(f"Average shortest path length: {avg_path_length:.2f}")
        else:
            # Use weakly connected for path analysis
            Ggiant_undirected = Ggiant.to_undirected()
            diameter = nx.diameter(Ggiant_undirected)
            avg_path_length = nx.average_shortest_path_length(Ggiant_undirected)
            print(f"Graph is not strongly connected (using undirected version)")
            print(f"Diameter: {diameter}")
            print(f"Average shortest path length: {avg_path_length:.2f}")
    except (nx.NetworkXError, nx.NetworkXPointlessConcept) as e:
        print(f"Error computing path metrics: {e}")
        print("Graph may be disconnected or too large")
else:
    print("Giant component too small for path analysis")

## 6) Clustering coefficient

In [None]:
avg_clustering = nx.average_clustering(G)
avg_clustering_giant = nx.average_clustering(Ggiant)

print(f"Average clustering (full graph): {avg_clustering:.4f}")
print(f"Average clustering (giant component): {avg_clustering_giant:.4f}")

## 7) Graph visualization

In [None]:
# Visualize the graph with node type colors
if Ggiant.number_of_nodes() > 100:
    sample_nodes = list(Ggiant.nodes())[:100]
    Gviz = Ggiant.subgraph(sample_nodes)
    print(f"Visualizing sample of {Gviz.number_of_nodes()} nodes")
else:
    Gviz = Ggiant

plt.figure(figsize=(14, 10))
pos = nx.spring_layout(Gviz, k=1, iterations=50)

# Color nodes by type
node_colors = []
for node in Gviz.nodes():
    node_type = Gviz.nodes[node].get('type', 'unknown')
    if node_type == 'ip':
        node_colors.append('steelblue')
    elif node_type == 'url':
        node_colors.append('coral')
    else:
        node_colors.append('gray')

nx.draw_networkx_nodes(Gviz, pos, node_size=100, node_color=node_colors, alpha=0.7)
nx.draw_networkx_edges(Gviz, pos, alpha=0.3, width=0.5, arrows=True, arrowsize=10, 
                       edge_color='gray', connectionstyle='arc3,rad=0.1')

# Add legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='steelblue', label='IP nodes'),
    Patch(facecolor='coral', label='URL nodes')
]
plt.legend(handles=legend_elements, loc='upper right')

plt.title("Network Graph Visualization (Directed)")
plt.axis('off')
plt.tight_layout()
plt.show()