# Hey!
This simple file allows you to turn a .csv from the previous one - "mitm_test.ipynb" - into an interactive domain visulisation.





---

if you want to use multiple csvs - add them to the df_list and df_color_map.
If you want to only use 1 app - please edit the df_list as well with the correct name and punctuation.

---
1. Every circle is a domain or a subdomain.

  The hierarchy is represented by the inclusion of circles into others.
Example: o-sdk.ads.unity3d.com is represented by 3 circles: o-sdk inside of ads inside of unity3d.
2. Colors represent the app (I analysed 6~) that the request corresponds to.

  I used low opacities for better visibility, and it turns out that in my mix of colors and their opacities purple is the combination of all of them.
3. Circle sizes, or masses, represent the frequency: how often did this or that domain appear in the requests data.

  See any insights?
Unity rules the mobile game app traffic scene.
For comparison, the g / doubleclick thing is Google Ad Network.

---

please save and open the resulting .html file in your browser - turns out it's very complicated to insert it inside the notebook.

In [None]:
!pip install tldextract pyvis

import pandas as pd
import tldextract
from collections import defaultdict
import networkx as nx
from pyvis.network import Network

In [None]:
appname_df = pd.read_csv("appname.csv") # upload the csv created in the "mitm_test" notebook

In [None]:
df_list = [("appname", appname_df),
           ("otherapp", otherapp_df)
          #  ...
           ]

df_color_map = {
    "appname": "#e6194B" #,  # red
    # "otherapp": "#3cb44b",  # green
    # "someother": "#4363d8",  # blue
    # "...": "#f58231",  # orange
    # "iii": "#911eb4",  # purple
    # "ooo": "#42d4f4"
}


In [None]:
def extract_domain_and_subdomains(url):
    ext = tldextract.extract(url)
    subdomains = ext.subdomain.split('.') if ext.subdomain else []
    subdomains.reverse()
    return [ext.domain] + subdomains

def build_domain_tree(df_list):
    domain_tree = {}
    frequencies = defaultdict(int)
    domain_to_dfs = defaultdict(set)

    for df_name, df_obj in df_list:
        urls = df_obj['url'].dropna().tolist()
        for url in urls:
            parts = extract_domain_and_subdomains(url)
            for p in parts:
                frequencies[p] += 1
                domain_to_dfs[p].add(df_name)
            # Build nested tree structure
            current = domain_tree
            for p in parts:
                current = current.setdefault(p, {})
    return domain_tree, frequencies, domain_to_dfs

def tree_to_graph(domain_tree):
    G = nx.DiGraph()
    def add_edges(d, parent=None):
        for node, subtree in d.items():
            if parent is not None:
                G.add_edge(parent, node)
            add_edges(subtree, node)
    add_edges(domain_tree)
    return G

def hex_to_rgb(hex_color):
    hex_color = hex_color.strip('#')
    return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))

def rgb_to_rgba(r, g, b, alpha=0.2):
    return f"rgba({r},{g},{b},{alpha})"

def mix_colors(hex_colors, alpha=0.2):
    if not hex_colors:
        return "rgba(153,153,153,0.2)"  # default gray
    rgbs = [hex_to_rgb(color) for color in hex_colors]
    avg_r = int(sum(c[0] for c in rgbs) / len(rgbs))
    avg_g = int(sum(c[1] for c in rgbs) / len(rgbs))
    avg_b = int(sum(c[2] for c in rgbs) / len(rgbs))
    return rgb_to_rgba(avg_r, avg_g, avg_b, alpha)

def visualize_pyvis(G, frequencies, domain_to_dfs, df_color_map, html_file="combined_domains.html"):
    net = Network(height="700px", width="100%", notebook=True, directed=True, cdn_resources='in_line')

    # set custom physics options using JSON configuration - this is some complex stuff
    net.set_options("""
    var options = {
      "physics": {
        "barnesHut": {
          "gravitationalConstant": -1000,
          "centralGravity": 0.05,
          "springLength": 200,
          "springConstant": 0.04,
          "damping": 0.09,
          "avoidOverlap": 0.5
        },
        "minVelocity": 0.75
      }
    }
    """)

    for node in G.nodes():
        freq = frequencies[node]
        df_names = domain_to_dfs[node]
        hex_colors = [df_color_map[df_name] for df_name in df_names] if df_names else []
        node_color = mix_colors(hex_colors, alpha=0.2)

        font_size = max(11, int(freq * 1.5))
        node_size = freq * 8

        tooltip = (
            f"{node}"
            f"Frequency: {freq}"
            f"Dataframes: {', '.join(df_names) if df_names else 'None'}"
        )

        net.add_node(
            node,
            label=node,
            size=node_size,
            color=node_color,
            font={'size': font_size, 'color': '#222'},
            title=tooltip
        )

    for source, target in G.edges():
        net.add_edge(source, target)

    net.show(html_file)
    print(f"visualization saved to {html_file}.")

In [None]:
domain_tree, frequencies, domain_to_dfs = build_domain_tree(df_list)
G = tree_to_graph(domain_tree)

visualize_pyvis(G, frequencies, domain_to_dfs, df_color_map, "ad_domains.html")