In [19]:
# Setup
import sys
sys.path.insert(0, '../..')

from rdflib import Graph, Namespace, RDF, RDFS, OWL
from rdflib.namespace import SKOS
import networkx as nx
import plotly.graph_objects as go
from collections import defaultdict
import numpy as np

## 1. Meta-Ontology Visualization

Shows the domain vocabulary (classes and properties)

In [20]:
# Load meta-ontology
meta_graph = Graph()
meta_graph.parse('meta_ontology.ttl', format='turtle')

print(f"Loaded {len(meta_graph)} triples from meta-ontology")

Loaded 66 triples from meta-ontology


In [21]:
# Extract classes and properties
def extract_meta_ontology_network(g):
    """Convert meta-ontology to NetworkX graph"""
    G = nx.DiGraph()
    
    # Add classes as nodes
    for s, p, o in g.triples((None, RDF.type, OWL.Class)):
        label = str(g.value(s, RDFS.label) or s.split('/')[-1])
        comment = str(g.value(s, RDFS.comment) or "")
        G.add_node(str(s), label=label, node_type='class', comment=comment[:100])
    
    # Add properties as edges (handle multiple domains/ranges)
    for s, p, o in g.triples((None, RDF.type, OWL.ObjectProperty)):
        prop_label = str(g.value(s, RDFS.label) or s.split('/')[-1])
        
        # Get ALL domains and ranges (not just first one)
        domains = list(g.objects(s, RDFS.domain))
        ranges = list(g.objects(s, RDFS.range))
        
        # Create edges for all domain-range combinations
        for domain in domains:
            for range_val in ranges:
                G.add_edge(str(domain), str(range_val), label=prop_label, edge_type='property')
    
    return G

meta_nx = extract_meta_ontology_network(meta_graph)
print(f"Meta-ontology: {meta_nx.number_of_nodes()} classes, {meta_nx.number_of_edges()} edges")

Meta-ontology: 11 classes, 37 edges


In [22]:
# Visualize meta-ontology
def visualize_network(G, title="Network Graph", height=700):
    """Create interactive Plotly visualization"""
    
    # Layout
    pos = nx.spring_layout(G, k=2, iterations=50, seed=42)
    
    # Edge traces
    edge_x = []
    edge_y = []
    edge_text = []
    
    for edge in G.edges(data=True):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
        
        # Edge label
        edge_label = edge[2].get('label', '')
        edge_text.append(edge_label)
    
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=1, color='#888'),
        hoverinfo='text',
        mode='lines',
        text=edge_text
    )
    
    # Node traces
    node_x = []
    node_y = []
    node_text = []
    
    for node in G.nodes(data=True):
        x, y = pos[node[0]]
        node_x.append(x)
        node_y.append(y)
        
        # Node label
        label = node[1].get('label', node[0].split('/')[-1])
        node_text.append(label)
    
    # Node colors by degree
    node_adjacencies = []
    node_hover_text = []
    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_data = G.nodes[adjacencies[0]]
        label = node_data.get('label', adjacencies[0].split('/')[-1])
        comment = node_data.get('comment', '')
        node_hover_text.append(f"{label}<br>{comment}<br>Connections: {len(adjacencies[1])}")
    
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale='YlGnBu',
            color=node_adjacencies,
            size=20,
            colorbar=dict(
                thickness=15,
                title='Connections',
                xanchor='left'
            ),
            line_width=2
        ),
        text=node_text,
        textposition="top center",
        hovertext=node_hover_text
    )
    
    # Create figure
    fig = go.Figure(
        data=[edge_trace, node_trace],
        layout=go.Layout(
            title=dict(text=title),
            showlegend=False,
            hovermode='closest',
            margin=dict(b=0, l=0, r=0, t=40),
            height=height,
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
        )
    )
    
    return fig

# Show meta-ontology
fig_meta = visualize_network(meta_nx, title="Meta-Ontology (Domain Vocabulary)")
fig_meta.show()


## 2. Knowledge Graph Visualization

Shows the instance data with:
- Documents (blue)
- Domain Concepts (green)
- Topic Nodes (orange)
- Chunks grouped together (gray cluster per document)

In [None]:
# Load knowledge graph
kg_graph = Graph()
kg_graph.parse('knowledge_graph.ttl', format='turtle')

print(f"Loaded {len(kg_graph)} triples from knowledge graph")

# Namespaces
ONTO = Namespace("http://pkm.local/ontology/")
NS = Namespace("http://pkm.local/sources/")

Loaded 1257 triples from knowledge graph


: 

In [24]:
# Extract knowledge graph network (with chunk grouping)
def extract_knowledge_graph_network(g):
    """Convert knowledge graph to NetworkX (chunks grouped per document)"""
    G = nx.DiGraph()
    
    # Track chunks per document
    doc_chunks = defaultdict(list)
    
    # Add Documents
    for s, p, o in g.triples((None, RDF.type, ONTO.Document)):
        label = str(g.value(s, RDFS.label) or s.split('/')[-1])
        G.add_node(str(s), label=label, node_type='document', size=30, color='lightblue')
    
    # Add Domain Concepts
    for s, p, o in g.triples((None, RDF.type, ONTO.DomainConcept)):
        label = str(g.value(s, SKOS.prefLabel) or s.split('/')[-1])
        G.add_node(str(s), label=label, node_type='concept', size=20, color='lightgreen')
    
    # Add Topic Nodes
    for s, p, o in g.triples((None, RDF.type, ONTO.TopicNode)):
        label = str(g.value(s, SKOS.prefLabel) or s.split('/')[-1])
        G.add_node(str(s), label=label, node_type='topic', size=25, color='orange')
    
    # Group chunks per document (don't add as individual nodes)
    for s, p, o in g.triples((None, RDF.type, ONTO.Chunk)):
        # Find parent document
        for doc, _, chunk in g.triples((None, ONTO.hasChunk, s)):
            doc_chunks[str(doc)].append(str(s))
    
    # Add chunk groups as single nodes
    for doc, chunks in doc_chunks.items():
        chunk_group_id = f"{doc}_chunks"
        doc_label = G.nodes[doc]['label'] if doc in G else doc.split('/')[-1]
        G.add_node(
            chunk_group_id,
            label=f"{len(chunks)} chunks",
            node_type='chunk_group',
            size=15,
            color='lightgray',
            parent_doc=doc_label
        )
        # Link to document
        if doc in G:
            G.add_edge(doc, chunk_group_id, label='hasChunks', edge_type='containment')
    
    # Add relationships
    # mentionsConcept: Chunk → Concept (aggregate to chunk_group → concept)
    concept_mentions = defaultdict(lambda: defaultdict(int))
    for chunk, p, concept in g.triples((None, ONTO.mentionsConcept, None)):
        # Find which document this chunk belongs to
        for doc in doc_chunks:
            if str(chunk) in doc_chunks[doc]:
                chunk_group = f"{doc}_chunks"
                concept_mentions[chunk_group][str(concept)] += 1
                break
    
    for chunk_group, concepts in concept_mentions.items():
        for concept, count in concepts.items():
            if chunk_group in G and concept in G:
                G.add_edge(
                    chunk_group,
                    concept,
                    label=f'mentions ({count})',
                    edge_type='mention',
                    weight=count
                )
    
    # coversConcept: Topic → Concept
    for topic, p, concept in g.triples((None, ONTO.coversConcept, None)):
        if str(topic) in G and str(concept) in G:
            G.add_edge(str(topic), str(concept), label='covers', edge_type='coverage')
    
    return G

kg_nx = extract_knowledge_graph_network(kg_graph)
print(f"Knowledge graph: {kg_nx.number_of_nodes()} nodes, {kg_nx.number_of_edges()} edges")
print(f"Node types: {dict([(data['node_type'], 1) for n, data in kg_nx.nodes(data=True)])}")

Knowledge graph: 259 nodes, 444 edges
Node types: {'document': 1, 'concept': 1, 'topic': 1, 'chunk_group': 1}


In [25]:
# Visualize knowledge graph with colored nodes
def visualize_knowledge_graph(G, title="Knowledge Graph", height=800):
    """Visualize with color-coded node types"""
    
    # Layout
    pos = nx.spring_layout(G, k=1.5, iterations=50, seed=42)
    
    # Edge traces
    edge_traces = []
    
    # Group edges by type
    edge_colors = {
        'containment': '#cccccc',
        'mention': '#88ccff',
        'coverage': '#ffaa88'
    }
    
    for edge_type, color in edge_colors.items():
        edge_x = []
        edge_y = []
        edge_text = []
        
        for edge in G.edges(data=True):
            if edge[2].get('edge_type') == edge_type:
                x0, y0 = pos[edge[0]]
                x1, y1 = pos[edge[1]]
                edge_x.extend([x0, x1, None])
                edge_y.extend([y0, y1, None])
                edge_label = edge[2].get('label', '')
                edge_text.append(edge_label)
        
        edge_trace = go.Scatter(
            x=edge_x, y=edge_y,
            line=dict(width=1, color=color),
            hoverinfo='text',
            mode='lines',
            text=edge_text,
            name=edge_type
        )
        
        edge_traces.append(edge_trace)
    
    # Node traces by type
    node_traces = []
    
    node_types = {
        'document': {'color': 'lightblue', 'name': 'Documents'},
        'concept': {'color': 'lightgreen', 'name': 'Concepts'},
        'topic': {'color': 'orange', 'name': 'Topics'},
        'chunk_group': {'color': 'lightgray', 'name': 'Chunks'}
    }
    
    for node_type, style in node_types.items():
        node_x = []
        node_y = []
        node_text = []
        node_sizes = []
        node_hover = []
        
        for node, data in G.nodes(data=True):
            if data.get('node_type') == node_type:
                x, y = pos[node]
                node_x.append(x)
                node_y.append(y)
                
                label = data.get('label', node.split('/')[-1])
                size = data.get('size', 20)
                
                node_text.append(label[:30])
                node_sizes.append(size)
                
                # Hover text
                hover = f"{label}<br>Type: {node_type}"
                if node_type == 'chunk_group':
                    hover += f"<br>Document: {data.get('parent_doc', 'Unknown')}"
                hover += f"<br>Connections: {G.degree(node)}"
                node_hover.append(hover)
        
        if len(node_x) > 0:
            node_trace = go.Scatter(
                x=node_x, y=node_y,
                mode='markers+text',
                hoverinfo='text',
                marker=dict(
                    color=style['color'],
                    size=node_sizes,
                    line=dict(width=2, color='white')
                ),
                text=node_text,
                textposition="top center",
                textfont=dict(size=10),
                name=style['name'],
                hovertext=node_hover
            )
            node_traces.append(node_trace)
    
    # Create figure
    fig = go.Figure(
        data=edge_traces + node_traces,
        layout=go.Layout(
            title=dict(text=title),
            showlegend=True,
            hovermode='closest',
            margin=dict(b=0, l=0, r=0, t=40),
            height=height,
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            legend=dict(
                yanchor="top",
                y=0.99,
                xanchor="left",
                x=0.01
            )
        )
    )
    
    return fig

# Show knowledge graph
fig_kg = visualize_knowledge_graph(kg_nx, title="Knowledge Graph (Chunks Grouped by Document)")
fig_kg.show()


## 3. Graph Statistics

In [26]:
# Meta-ontology stats
print("=== META-ONTOLOGY STATS ===")
print(f"Classes: {len([n for n, d in meta_nx.nodes(data=True) if d.get('node_type') == 'class'])}")
print(f"Properties: {meta_nx.number_of_edges()}")
print(f"\nTop concepts by connections:")
degrees = dict(meta_nx.degree())
sorted_nodes = sorted(degrees.items(), key=lambda x: x[1], reverse=True)[:5]
for node, degree in sorted_nodes:
    label = meta_nx.nodes[node].get('label', node.split('/')[-1])
    print(f"  {label}: {degree} connections")

=== META-ONTOLOGY STATS ===
Classes: 10
Properties: 37

Top concepts by connections:
  EU Data Act: 13 connections
  Data portability: 12 connections
  Knowledge Graph: 6 connections
  Semantic Web: 6 connections
  Data Integration: 6 connections


In [27]:
# Knowledge graph stats
print("\n=== KNOWLEDGE GRAPH STATS ===")
print(f"Documents: {len([n for n, d in kg_nx.nodes(data=True) if d.get('node_type') == 'document'])}")
print(f"Chunk Groups: {len([n for n, d in kg_nx.nodes(data=True) if d.get('node_type') == 'chunk_group'])}")
print(f"Concepts: {len([n for n, d in kg_nx.nodes(data=True) if d.get('node_type') == 'concept'])}")
print(f"Topics: {len([n for n, d in kg_nx.nodes(data=True) if d.get('node_type') == 'topic'])}")
print(f"Total edges: {kg_nx.number_of_edges()}")

print(f"\nMost mentioned concepts:")
degrees = dict(kg_nx.degree())
concepts = [(n, d) for n, d in kg_nx.nodes(data=True) if d.get('node_type') == 'concept']
sorted_concepts = sorted(concepts, key=lambda x: degrees[x[0]], reverse=True)[:10]
for node, data in sorted_concepts:
    label = data.get('label', node.split('/')[-1])
    print(f"  {label}: {degrees[node]} connections")


=== KNOWLEDGE GRAPH STATS ===
Documents: 14
Chunk Groups: 14
Concepts: 210
Topics: 21
Total edges: 444

Most mentioned concepts:
  Data Act: 6 connections
  Knowledge Graphs: 5 connections
  Linked Data: 4 connections
  European Union: 3 connections
  Sofia University: 3 connections
  European Commission: 3 connections
  Member State: 3 connections
  Member States: 3 connections
  Actionable Insight: 2 connections
  Add one URL per line, lines starting with # are ignored: 2 connections


## 4. Export Visualizations

Save as HTML for sharing

In [28]:
# Save visualizations
fig_meta.write_html('meta_ontology_viz.html')
print("✓ Saved: meta_ontology_viz.html")

fig_kg.write_html('knowledge_graph_viz.html')
print("✓ Saved: knowledge_graph_viz.html")

print("\nOpen these files in a browser to explore interactively!")

✓ Saved: meta_ontology_viz.html
✓ Saved: knowledge_graph_viz.html

Open these files in a browser to explore interactively!
