In [1]:
%pip install plotly
%pip install scipy
%pip install nbformat
%pip install htmlmin
%pip install dash 

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import networkx as nx
import re
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import plotly.graph_objects as go
import networkx as nx
import numpy as np

# Visualise Identity Islands

## Load the Graph Data

In [3]:
import networkx as nx
import re

def load_nquads_to_graph(nquads_file, base_uri="http://syntetic_identity_island.org/"):
    G = nx.DiGraph()
    pattern_literal = re.compile(r'<(.*?)>\s+<(.*?)>\s+"(.*?)"\s*(?:\^\^<.*?>)?\s*\.\s*')
    pattern_uri = re.compile(r'<(.*?)>\s+<(.*?)>\s+<(.*?)>\s*\.\s*')
    
    with open(nquads_file, 'r') as f:
        for line in f:
            match_literal = pattern_literal.match(line)
            match_uri = pattern_uri.match(line)
            if match_literal:
                subject = match_literal.group(1)
                predicate = match_literal.group(2)
                obj = match_literal.group(3)
                
                if predicate.startswith(base_uri):
                    pred_key = predicate[len(base_uri):]

                    # Ensure the subject node exists
                    if subject not in G:
                        G.add_node(subject)
                    
                    # Add attributes to the node
                    if pred_key not in G.nodes[subject]:
                        G.nodes[subject][pred_key] = obj
                    else:
                        if isinstance(G.nodes[subject][pred_key], list):
                            G.nodes[subject][pred_key].append(obj)
                        else:
                            G.nodes[subject][pred_key] = [G.nodes[subject][pred_key], obj]
            
            elif match_uri:
                subject = match_uri.group(1)
                predicate = match_uri.group(2)
                obj = match_uri.group(3)
                
                if predicate.startswith(base_uri):
                    pred_key = predicate[len(base_uri):]

                    # Ensure the subject node exists
                    if subject not in G:
                        G.add_node(subject)
                    # Ensure the object node exists if it's also a subject
                    if obj not in G:
                        G.add_node(obj)
                    
                    # Add an edge
                    G.add_edge(subject, obj, type=pred_key)
    
    return G

G = load_nquads_to_graph('../data/ingested_data/syntetic_identity_island.nq')

# Debug: Check the number of nodes and edges
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")

Number of nodes: 121
Number of edges: 163


In [4]:
# Debug: Print out nodes and their attributes
for node, attrs in G.nodes(data=True):
    print(f"Node ID: {node}")
    print("Attributes:")
    for attr_key, attr_value in attrs.items():
        print(f"{attr_key}: {attr_value}")
    print()
    break

Node ID: http://syntetic_identity_island.org/3f9fa0ea-d962-40f5-89cd-8253c94db91c
Attributes:
type: Identity
name: Katherine Glass
age: 35
date_of_birth: 11/04/1989
nationality: KGZ



In [5]:
for edge in G.edges(data=True):
    print(f"Edge from {edge[0]} to {edge[1]}")
    print("Attributes:")
    for attr_key, attr_value in edge[2].items():
        print(f"{attr_key}: {attr_value}")
    break

Edge from http://syntetic_identity_island.org/3f9fa0ea-d962-40f5-89cd-8253c94db91c to http://syntetic_identity_island.org/9dcbecaa-f0b1-415f-9843-9c218bbc4466
Attributes:
type: IDENTITY_EQUIVALENCE


## Plot Data App

In [6]:
pos = nx.spring_layout(G)
nx.set_node_attributes(G, pos, 'pos')

In [7]:
def create_figure(G, pos, highlight_nodes=None):
    # Create edge traces with hoverinfo
    edge_trace = []
    hover_edge_trace = []

    # Collect nodes to highlight
    nodes_to_highlight = set()
    
    if highlight_nodes is not None:
        nodes_to_highlight.update(highlight_nodes)
        for node in highlight_nodes:
            nodes_to_highlight.update(set(G.neighbors(node)))

    for edge in G.edges(data=True):
        if highlight_nodes is None or (edge[0] in nodes_to_highlight and edge[1] in nodes_to_highlight):
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]
            edge_info = f"Edge: {edge[0]}-{edge[1]}<br>Type: {edge[2].get('type', 'N/A')}"
    
            edge_trace.append(
                go.Scatter(
                    x=[x0, x1, None],
                    y=[y0, y1, None],
                    line=dict(width=1, color='#888'),
                    mode='lines',
                    hoverinfo='none',
                    visible=True
                )
            )
    
            hover_edge_trace.append(
                go.Scatter(
                    x=[(x0 + x1) / 2],
                    y=[(y0 + y1) / 2],
                    mode='markers',
                    marker=dict(size=0.5, color='#888'),
                    hoverinfo='text',
                    text=[f"Type: {edge[2].get('type', 'N/A')}"],
                    hovertemplate='%{text}<extra></extra>',
                    visible=True
                )
            )

    # Create node trace
    node_x = []
    node_y = []
    node_color = []
    node_size = []
    node_text = []

    for node in G.nodes(data=True):
        if highlight_nodes is None or node[0] in nodes_to_highlight:
            x, y = pos[node[0]]
            node_x.append(x)
            node_y.append(y)
            degree = nx.degree(G, node[0])
            num_neighbors = len(list(G.neighbors(node[0])))

            node_color.append(degree)
            node_size.append(10 + 2 * degree)

            attributes = node[1] if isinstance(node[1], dict) else {}
            attr_text = '<br>'.join([f"{key}: {value}" for key, value in attributes.items()])
            # node_text.append(f"ID: {node[0]}<br>Degree: {degree}<br>{attr_text}")
            node_text.append(f"ID: {node[0]}<br>degree: {degree}<br>neighbors: {num_neighbors}<br>{attr_text}")

    node_trace = go.Scatter(
        x=node_x,
        y=node_y,
        text=node_text,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale='Viridis_r',
            color=node_color,
            size=node_size,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            )
        )
    )

    fig = go.Figure(data=edge_trace + hover_edge_trace + [node_trace],
                    layout=go.Layout(
                        title='Identity Islands',
                        titlefont_size=16,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20, l=5, r=5, t=40),
                        xaxis=dict(showgrid=False, zeroline=False),
                        yaxis=dict(showgrid=False, zeroline=False)
                    ))

    fig.update_layout(width=1100, height=1000)
    return fig

In [8]:
# Initialize Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='network-graph', figure=create_figure(G, pos), style={'width': '100%', 'height': '90vh'}),
    html.Button('Reset', id='reset-button', n_clicks=0, style={'position': 'absolute', 'top': '10px', 'right': '230px'}),
    dcc.Store(id='last-clicked-node', data=None)
])

In [9]:
@app.callback(
    [Output('network-graph', 'figure'),
     Output('last-clicked-node', 'data')],
    [Input('network-graph', 'clickData'),
     Input('reset-button', 'n_clicks')],
    [State('last-clicked-node', 'data')]
)

def update_figure(clickData, resetClicks, last_clicked_node):
    ctx = dash.callback_context

    if not ctx.triggered:
        return create_figure(G, pos), None

    trigger = ctx.triggered[0]['prop_id']

    if 'reset-button' in trigger:
        # Handle reset button click: reset to initial state
        return create_figure(G, pos), None

    if clickData is not None:
        text = clickData['points'][0]['text']
        if 'Type:' in text:
            # Handle edge click
            edge_info = text.split('<br>')[0].split(': ')[1]
            for edge in G.edges(data=True):
                edge_type = f"Type: {edge[2].get('type', 'N/A')}"
                if edge_type == text:
                    edge = (edge[0], edge[1])
                    return create_figure(G, pos, highlight_nodes=[edge[0], edge[1]]), last_clicked_node
        else:
            # Handle node click
            node_id = text.split('<br>')[0].split(': ')[1]
            return create_figure(G, pos, highlight_nodes=[node_id]), node_id

    return create_figure(G, pos), None

In [10]:
if __name__ == '__main__':
    app.run_server(debug=True)

In [11]:
from IPython.display import IFrame

IFrame(src="http://127.0.0.1:8050", width='100%', height='1000px')