# I2P Traffic Analysis Notebook
Datenverkehrsanalyse basierend auf aufgezeichnetem I2P-Netzwerkverkehr. Dabei wird der gesamte Verkehr analysiert, im Hinblick auf die HTTP-Requests.

In [None]:
!pip install scapy matplotlib numpy pandas scipy

In [None]:
from scapy.all import rdpcap

source_ip = '10.8.0.2'
target_ip = '10.8.0.11'
packets = rdpcap('traffic_with_filter.pcap')

In [None]:
from scapy.all import IP, TCP, UDP
import numpy as np
from scipy.stats import entropy

is_source_filter_enabled = True
# Filter alle Pakete raus, welche nicht die target_ip als Empfänger haben
filtered_packets = [
    pkt for pkt in packets
    if IP in pkt
       and pkt[IP].dst == target_ip
       and (pkt[IP].src == source_ip if is_source_filter_enabled else True)
]

print(f"Anzahl der Pakete zu {target_ip}: {len(filtered_packets)}")

# Anzahl der Pakete vom Sender zu einem anderen Knoten.
Damit soll der erste Sprungknoten identifiziert werden.

In [None]:
from collections import Counter
from scapy.all import IP
import matplotlib.pyplot as plt

#Muss angepasst werden, wenn anderer Sender
source_ip = source_ip
is_showed = True

# Filter
flows = [
    (pkt[IP].src, pkt[IP].dst)
    for pkt in packets
    if IP in pkt and pkt[IP].src == source_ip and pkt[IP].dst.startswith("10.")
]

flow_counts = Counter(flows).most_common()
flow_labels = [f"{src} → {dst}" for src, dst in dict(flow_counts).keys()]
flow_values = list(dict(flow_counts).values())

plt.figure(figsize=(12, 6))
plt.barh(flow_labels, flow_values, alpha=0.8)
plt.xlabel('Anzahl Pakete')
plt.title(f'Kommunikationspaare von {source_ip} aus')
plt.gca().invert_yaxis()
plt.grid(True)
plt.show()

if is_showed:
    print("Verbindungen und Anzahl Pakete:")
    for (src, dst), count in flow_counts:
        print(f"{src} → {dst}: {count}")


## Zählen der Kommunikation aller Knoten mit allen anderen Knoten im Netzwerk.
Es zeigt, welche Knoten häufig miteinander kommunizieren.

In [None]:
flows = [(pkt[IP].src, pkt[IP].dst) for pkt in packets if IP in pkt]

flow_counts = Counter(flows).most_common(15)
flow_labels = [f"{src} → {dst}" for src, dst in dict(flow_counts).keys()]
flow_values = list(dict(flow_counts).values())

plt.figure(figsize=(12, 6))
plt.barh(flow_labels, flow_values, alpha=0.8)
plt.xlabel('Anzahl Pakete')
plt.title('Top 15 Kommunikationspaare')
plt.gca().invert_yaxis()
plt.grid(True)
plt.show()

## Portanalyse (Anzahl Pro Paket sowie TCP/UDP)

Pro Port für TCP sowie UDP wird die Anzahl an erhaltenene Pakete ausgegeben. Der Port bezieht sich auf den Empfänger-Port.

In [None]:
is_showed = False

# Extrahieren der TCP- und UDP-Ports
tcp_ports = [pkt[TCP].dport for pkt in filtered_packets if TCP in pkt]
udp_ports = [pkt[UDP].dport for pkt in filtered_packets if UDP in pkt]

print("Anzahl TCP Pakete:", len(tcp_ports))
print("Anzahl UDP Pakete:", len(udp_ports))

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# TCP
tcp_port_counts = Counter(tcp_ports)
tcp_ports_sorted = sorted(tcp_port_counts.keys())
tcp_counts = [tcp_port_counts[port] for port in tcp_ports_sorted]
indices_tcp = range(len(tcp_ports_sorted))

axes[0].bar(indices_tcp, tcp_counts, width=0.8, alpha=0.8)
axes[0].set_title("TCP Ports")
axes[0].set_xlabel("Port")
axes[0].set_ylabel("Anzahl Pakete")
axes[0].grid(True)
axes[0].set_xticks(indices_tcp)
axes[0].set_xticklabels(tcp_ports_sorted, rotation=45)

# UDP
udp_port_counts = Counter(udp_ports)
udp_ports_sorted = sorted(udp_port_counts.keys())
udp_counts = [udp_port_counts[port] for port in udp_ports_sorted]
indices_udp = range(len(udp_ports_sorted))

axes[1].bar(indices_udp, udp_counts, width=0.8, alpha=0.8)
axes[1].set_title("UDP Ports")
axes[1].set_xlabel("Port")
axes[1].set_ylabel("Anzahl Pakete")
axes[1].grid(True)
axes[1].set_xticks(indices_udp)
axes[1].set_xticklabels(udp_ports_sorted, rotation=45)

plt.tight_layout()
plt.show()

if is_showed:
    print("TCP Ports und Anzahl der Pakete:")
    for port in tcp_ports_sorted:
        print(f"Port {port}: {tcp_port_counts[port]}")
    print("\nUDP Ports und Anzahl der Pakete:")
    for port in udp_ports_sorted:
        print(f"Port {port}: {udp_port_counts[port]}")


## Portanalyse (Payload-Grösse)

Analyse der an den Ports eingehenden Payload-Grössen, um typische Grössenverteilungen zu erkennen.

In [None]:
from collections import Counter
import matplotlib.pyplot as plt
from scapy.all import *

is_showed = False
top_output = 15

tcp_payload_sizes = [
    len(pkt[TCP].payload)
    for pkt in filtered_packets
    if TCP in pkt
]
udp_payload_sizes = [
    len(pkt[UDP].payload)
    for pkt in filtered_packets
    if UDP in pkt
]

print("Anzahl TCP Pakete:", len(tcp_payload_sizes))
print("Anzahl UDP Pakete:", len(udp_payload_sizes))

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# TCP: Top Ausgeben
tcp_size_counts = Counter(tcp_payload_sizes)
top_tcp = tcp_size_counts.most_common(top_output)
tcp_sizes_top = [size for size, count in top_tcp]
tcp_counts_top = [count for size, count in top_tcp]
indices_tcp = range(len(top_tcp))

axes[0].bar(indices_tcp, tcp_counts_top, width=0.8, alpha=0.8)
axes[0].set_title(f"Top {top_output} TCP")
axes[0].set_xlabel("Grösse (Bytes)")
axes[0].set_ylabel("Anzahl Pakete")
axes[0].grid(True)
axes[0].set_xticks(indices_tcp)
axes[0].set_xticklabels(tcp_sizes_top, rotation=45)

# UDP: Top Ausgeben
udp_size_counts = Counter(udp_payload_sizes)
top_udp = udp_size_counts.most_common(top_output)
udp_sizes_top = [size for size, count in top_udp]
udp_counts_top = [count for size, count in top_udp]
indices_udp = range(len(top_udp))

axes[1].bar(indices_udp, udp_counts_top, width=0.8, alpha=0.8)
axes[1].set_title(f"Top {top_output} UDP")
axes[1].set_xlabel("Grösse (Bytes)")
axes[1].set_ylabel("Anzahl Pakete")
axes[1].grid(True)
axes[1].set_xticks(indices_udp)
axes[1].set_xticklabels(udp_sizes_top, rotation=45)

plt.tight_layout()
plt.show()

if is_showed:
    for size, count in sorted(top_tcp, key=lambda x: x[1], reverse=True):
        print(f"{size} Bytes: {count} Pakete")
    print(f"\nTop {top_output} UDP Paketgrössen (bytes) – sortiert nach Häufigkeit:")
    for size, count in sorted(top_udp, key=lambda x: x[1], reverse=True):
        print(f"{size} Bytes: {count} Pakete")

## 3. Paketgrössenanalyse

Es wird geprüft, wie häufig Pakete mit einer Grösse versendet wird.

In [None]:
from collections import Counter
import matplotlib.pyplot as plt

show_details = False
packet_lengths = [len(pkt) for pkt in filtered_packets]

# Häufigkeit
length_counts = Counter(packet_lengths)
# Nach Häufigkeit sortieren
sorted_counts = sorted(length_counts.items(), key=lambda x: x[1], reverse=True)

if show_details:
    print("Paketlängen und Häufigkeiten:")
    for length, count in sorted_counts:
        print(f"{length} Bytes: {count}")
else:
    print("show_details auf True setzen um die Details zu sehen")

plt.figure(figsize=(10, 5))
plt.hist(packet_lengths, bins=10, alpha=0.7)
plt.xlabel('Paketlänge (Bytes)')
plt.ylabel('Häufigkeit')
plt.title('Paketgrössenverteilung')
plt.grid(True)
plt.show()


## Entropie-Analyse (Payload)

Eine hohe Entropie deutet darauf hin, dass eine starke Verschlüsselung verwendet wird.

In [None]:
show_details = True


def calc_entropy(data):
    if not data:
        return 0
    counts = np.bincount(np.frombuffer(data, dtype=np.uint8), minlength=256)
    return entropy(counts, base=2)


entropies = [calc_entropy(bytes(pkt[Raw].load)) for pkt in filtered_packets if Raw in pkt]
mean_entropy = np.mean(entropies)
plt.figure(figsize=(10, 5))
plt.axvline(mean_entropy, color='red', linestyle='dashed', linewidth=2, label=f'Mittelwert: {mean_entropy:.2f} Bits')
counts, bin_edges, _ = plt.hist(entropies, bins=10, alpha=0.7)
plt.xlabel('Entropie (Bits)')
plt.ylabel('Anzahl Pakete')
plt.title('Verteilung der Payload-Entropie')
plt.grid(True)
plt.show()

if show_details:
    print("\nDurchschnittliche Entropie: {:.4f} Bits".format(mean_entropy))
    print("Entropie (gerundet) | Anzahl Pakete")
    print("---------------------|----------------")
    print("Bin-Bereich (von - bis)       | Anzahl Pakete")
    print("--------------------------------|----------------")
    for i in range(len(counts)):
        start = bin_edges[i]
        end = bin_edges[i + 1]
        count = int(counts[i])
        print(f"{start:8.4f} - {end:8.4f}       | {count:>14}")


## Protokollverteilung (TCP vs. UDP)

Zeigt der Anteil der Pakete TCP bzw. UDP verwendet.

In [None]:
protocols = ["TCP" if TCP in pkt else "UDP" if UDP in pkt else "Andere" for pkt in filtered_packets]
protocol_counts = Counter(protocols)

plt.figure(figsize=(8, 6))
plt.pie(protocol_counts.values(), labels=protocol_counts.keys(), autopct='%1.1f%%', startangle=140,
        colors=['skyblue', 'salmon', 'grey'])
plt.axis('equal')
plt.title('TCP/UDP Verteilung zu 10.8.0.11')
plt.show()

# Start- und Endknoten finden

In [None]:
from scapy.all import IP, TCP, Raw
import re

show_details = True


def build_graph_from_pcap(packets):
    global graph, start_nodes_dict, hello_nodes, combined_events
    graph = defaultdict(list)
    pattern = r"GET http://.*HTTP"
    start_nodes_dict = defaultdict(list)
    hello_nodes = set()
    # (Zeilennummer, IP, Typ)
    combined_events = []

    for idx, pkt in enumerate(packets, start=1):
        if IP in pkt and TCP in pkt:
            src_ip = pkt[IP].src
            dst_ip = pkt[IP].dst
            src_port = pkt[TCP].sport
            dst_port = pkt[TCP].dport
            src_node = (src_ip, src_port)
            dst_node = (dst_ip, dst_port)
            pkt_time = pkt.time

            # Kante im Graphen speichern
            graph[src_node].append((dst_node, pkt_time))
            if pkt.haslayer(Raw):
                try:
                    payload = pkt[Raw].load.decode('utf-8', errors='ignore')
                except Exception:
                    payload = ""

                # Prüfe auf Request-Zeile
                if re.search(pattern, payload):
                    start_nodes_dict[dst_node].append(idx)
                    combined_events.append((idx, dst_ip, "GET"))
                # Suche nach Zeilen mit "Hello from"
                for line in payload.splitlines():
                    if "Hello from" in line:
                        hello_nodes.add(dst_ip)
                        combined_events.append((idx, src_ip, "HELLO"))

    return graph, dict(start_nodes_dict), hello_nodes, sorted(combined_events, key=lambda x: x[0])


def main():
    graph, start_nodes_dict, hello_nodes, combined_events = build_graph_from_pcap(packets)

    unique_get_nodes = sorted(set(start_nodes_dict.keys()))
    print("Eindeutige Startknoten:")
    for node in unique_get_nodes:
        ip, port = node
        print(f"{ip}:{port}")

    print("\nEindeutige Empfängerknoten:")
    for node in sorted(hello_nodes):
        ip = node
        print(f"{ip}")

    if show_details:
        print("\nAlle Einträge:")
        for line_num, ip, typ in combined_events:
            print(f"{line_num}: {ip} → {typ}")
    else:
        print("\nDetails können angezeigt werden indem die Variable show_details auf true gesetzt wird")


if __name__ == "__main__":
    main()

## Verbindungen die der Startknoten macht finden

In [None]:
from collections import defaultdict
from scapy.all import rdpcap, IP, TCP

show_details = False
show_details_all_connections = True


def track_start_to_target_paths(use_packages, track_other_ip_adress=None):
    print("Details can be shown with the variable show_details = True")
    print("Listing of all connections from the start node to another node:\n")
    all_connections = []
    all_start_connections = []
    start_node_unique_connections = defaultdict(list)
    track_other_connections = defaultdict(list)
    # Filter all GET and HELLO events in chronological order
    get_hello_events = [e for e in combined_events if e[2] in {"GET", "HELLO"}]

    for i, (get_line_num, get_ip, typ) in enumerate(get_hello_events):
        if typ != "GET":
            continue
        get_pkt = use_packages[get_line_num - 1]
        # Ermittle die Ziel-IP
        if IP in get_pkt and TCP in get_pkt:
            dst_ip = get_pkt[IP].dst
            dst_port = get_pkt[TCP].dport
        else:
            dst_ip = "?"
            dst_port = "?"
        # Determine the start node
        matching_nodes = [node for node in start_nodes_dict if node[0] == get_ip]
        if not matching_nodes:
            continue
        get_node = matching_nodes[0]

        # Find the next receiver node after a start node.
        hello_line_num = None
        hello_ip = None
        for j in range(i + 1, len(get_hello_events)):
            if get_hello_events[j][2] == "HELLO":
                hello_line_num = get_hello_events[j][0]
                hello_ip = get_hello_events[j][1]
                break
        if hello_line_num is None:
            print(f"{get_line_num} GET:     {get_ip}:{get_node[1]} → {dst_ip}:{dst_port}")
            print("Kein folgendes HELLO gefunden.\n")
            continue

        end_index = min(hello_line_num, len(use_packages))

        # Search all packets between the start node and the receiver node. Only those that have the start node as the sender.
        # Packets whose destination IP begins with ‘192.’ are ignored.
        matching_connections = []
        # To avoid duplicates
        seen_connections = set()
        for pkt_idx in range(get_line_num - 1, end_index):
            pkt = use_packages[pkt_idx]
            if IP in pkt and TCP in pkt:
                src = pkt[IP].src
                conn_dst_ip = pkt[IP].dst
                conn_dst_port = pkt[TCP].dport
                a_connection = (pkt_idx + 1, conn_dst_ip, conn_dst_port)
                all_connections.append(a_connection)
                if track_other_ip_adress and conn_dst_ip in track_other_ip_adress:
                    track_other_connections[conn_dst_ip].append(a_connection)
                if src == get_node[0]:
                    if conn_dst_ip.startswith("192."):
                        continue
                    key = (conn_dst_ip, conn_dst_port)
                    if key not in seen_connections:
                        seen_connections.add(key)
                        matching_connections.append(a_connection)
        all_start_connections.extend(matching_connections)
        if len(matching_connections) == 1:
            for conn in matching_connections:
                if conn not in start_node_unique_connections[get_node[0]]:
                    start_node_unique_connections[get_node[0]].append(conn)
        if show_details:
            if not matching_connections:
                continue
            print("-----------------------------")
            print("Connections from the start node to other nodes:")
            print("-----------------------------")
            for line, conn_dst_ip, conn_dst_port in matching_connections:
                print(f"{line:<5}: {get_node[0]}:{get_node[1]} → {conn_dst_ip}:{conn_dst_port}")
            print("-----------------------------")

    if show_details_all_connections:
        print("-----------------------------")
        print("Connections from the start node to other nodes:")
        for line_number, dst_ip, dst_port in all_start_connections:
            print(f"Packet {line_number}: {dst_ip}:{dst_port}")
        print("-----------------------------")
    print("Listing all connections for start nodes that have exactly one outgoing connection:")
    print("-----------------------------")
    total_found = 0
    for start_ip, connections in start_node_unique_connections.items():
        for line_number, dst_ip, dst_port in connections:
            print(f"{line_number}:{start_ip} {dst_ip}:{dst_port}")
            total_found += 1
    print(f"Total entries found: {total_found}")
    print("-----------------------------")

    return all_connections, all_start_connections, start_node_unique_connections.items(), track_other_connections.items()


if __name__ == "__main__":
    all_connections, all_start_connections, start_node_unique_connections, track_other_connections = track_start_to_target_paths(
        rdpcap('traffic_with_filter.pcap'), ["10.8.0.11"])