# RAN Knowledge Graph Builder
This notebook demonstrates step-by-step execution of knowledge graph generation using the `kg_builder.py` module.

## Overview
The knowledge graph builder transforms RAN configuration data into a structured Neo4j graph database with:
- **Nodes**: Tables, Columns, Concepts
- **Relationships**: NAME_SIMILARITY, VALUE_OVERLAP, PATTERN_MATCH, REFERENCES, CONCEPTUAL_GROUP

## Prerequisites
- Neo4j database running on localhost:7687
- Required Python packages installed
- Sample RAN data available

In [1]:
# Import required libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Import our knowledge graph builder
from kg_builder import RANNeo4jIntegrator
import pathlib
notebook_dir = pathlib.Path().resolve()
parser_dir = notebook_dir / '../parser_module'
sys.path.append(str(parser_dir.resolve()))
from parser import parse_xml

print("✅ Libraries imported successfully!")

✅ Libraries imported successfully!


In [2]:
# Step 1: Collect Data
xml_file_path = '../parser_module/data/RAN_CM_DATA_SAMPLES.xml'
dfs, metadata, metadata2 = parse_xml(xml_file_path)

# Display sample data info
print("\n📋 Data Overview:")
for table_name, df in dfs.items():
    print(f"  • {table_name}: {len(df)} rows, {len(df.columns)} columns")
    print(f"    Columns: {list(df.columns)}")
    print()


📋 Data Overview:
  • MeContext: 2 rows, 7 columns
    Columns: ['dateTime', 'Area_Name', 'SiteName', 'vsDataType', 'vsDataFormatVersion', 'MeContextId', 'neType']

  • EnodeBInfo: 2 rows, 5 columns
    Columns: ['dateTime', 'Area_Name', 'SiteName', 'Id2', 'userLabel']

  • AnrFunction: 4 rows, 48 columns
    Columns: ['dateTime', 'Area_Name', 'SiteName', 'Id2', 'Id3', 'vsDataType', 'vsDataFormatVersion', 'AnrFunction.removeGnbTime', 'AnrFunction.removeEnbTime', 'AnrFunction.anrFunctionId', 'AnrFunction.removeFreqRelTime', 'AnrFunction.removeNrelTime', 'AnrFunction.pciConflictMobilityEcgiMeas', 'AnrFunction.plmnWhiteListUtranEnabled', 'AnrFunction.zzzTemporary13', 'AnrFunction.probCellDetectLowHoSuccTime', 'AnrFunction.zzzTemporary12', 'AnrFunction.zzzTemporary7', 'AnrFunction.zzzTemporary11', 'AnrFunction.zzzTemporary10', 'AnrFunction.zzzTemporary9', 'AnrFunction.removeNcellTime', 'AnrFunction.perCgiMeasPlmnWhiteListUtran', 'AnrFunction.prioHoSuccRate', 'AnrFunction.perCgiMeasPlmnWhit

In [3]:
# Step 2: Initialize Neo4j Connection
print("🔗 Connecting to Neo4j...")

# Neo4j connection parameters
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "ranqarag#1"  # Change this to your actual password

try:
    # Initialize the integrator
    integrator = RANNeo4jIntegrator(
        neo4j_uri=NEO4J_URI,
        neo4j_user=NEO4J_USER,
        neo4j_password=NEO4J_PASSWORD
    )
    print("✅ Successfully connected to Neo4j!")
    print(f"📊 Embedding model: {integrator.embedding_model}")
    print(f"🎯 Similarity threshold: {integrator.similarity_threshold}")
    
except Exception as e:
    print(f"❌ Failed to connect to Neo4j: {e}")
    print("Please ensure Neo4j is running and credentials are correct.")

🔗 Connecting to Neo4j...
✅ Successfully connected to Neo4j!
📊 Embedding model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)
🎯 Similarity threshold: 0.7


In [5]:
# Step 3.1: Create Table Nodes
print("🔹 Creating table nodes...")
integrator.create_table_nodes(dfs)

🔹 Creating table nodes...


ServiceUnavailable: Couldn't connect to localhost:7687 (resolved to ('[::1]:7687', '127.0.0.1:7687')):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 111] Connection refused)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 111] Connection refused)

In [None]:
# Step 3: Create Knowledge Graph
print("🏗️ Creating knowledge graph in Neo4j...")

try:
    # Create nodes and relationships in Neo4j
    integrator.create_nodes_and_relationships(dfs)
    print("✅ Successfully created nodes and relationships in Neo4j!")
    
    # Verify the graph creation
    with integrator.driver.session() as session:
        # Count nodes and relationships
        table_count = session.run("MATCH (t:Table) RETURN count(t) as count").single()['count']
        column_count = session.run("MATCH (c:Column) RETURN count(c) as count").single()['count']
        
        relationship_stats = session.run("""
            MATCH ()-[r]-()
            RETURN type(r) as rel_type, count(r) as count
            ORDER BY count DESC
        """).data()
        
        print(f"\n📊 Knowledge Graph Statistics:")
        print(f"  • Tables: {table_count}")
        print(f"  • Columns: {column_count}")
        print(f"  • Relationships:")
        
        total_relationships = 0
        for stat in relationship_stats:
            print(f"    - {stat['rel_type']}: {stat['count']}")
            total_relationships += stat['count']
        
        print(f"\n🎉 KNOWLEDGE GRAPH CREATION COMPLETE!")
        print(f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
        print(f"📊 Created {table_count} table nodes and {column_count} column nodes")
        print(f"🔗 Established {total_relationships} semantic relationships")
        print(f"⚡ Graph is ready for querying and chatbot integration!")
        print(f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
            
except Exception as e:
    print(f"❌ Error creating knowledge graph: {e}")
    print("Please check your Neo4j connection and try again.")

# Clean up
try:
    integrator.close()
    print("\n✅ Neo4j connection closed successfully.")
except:
    pass

In [None]:
# Step 3: Create Knowledge Graph
print("🏗️ Creating knowledge graph in Neo4j...")

try:
    # Step 3.1: Create Table Nodes
    print("🔹 Creating table nodes...")
    integrator.create_table_nodes(dfs)

    # Step 3.2: Create Column Nodes and Relationships
    print("🔹 Creating column nodes and relationships...")
    integrator.create_column_nodes_and_relationships(dfs)

    # Step 3.3: Create Semantic Relationships
    print("🔹 Creating semantic relationships...")
    integrator.create_semantic_relationships(dfs)

    print("✅ Knowledge graph creation completed successfully!")

    # Verify the graph creation
    with integrator.driver.session() as session:
        # Count nodes and relationships
        table_count = session.run("MATCH (t:Table) RETURN count(t) as count").single()['count']
        column_count = session.run("MATCH (c:Column) RETURN count(c) as count").single()['count']

        relationship_stats = session.run("""
            MATCH ()-[r]-()
            RETURN type(r) as rel_type, count(r) as count
            ORDER BY count DESC
        """).data()

        print(f"\n📊 Knowledge Graph Statistics:")
        print(f"  • Tables: {table_count}")
        print(f"  • Columns: {column_count}")
        print(f"  • Relationships:")

        total_relationships = 0
        for stat in relationship_stats:
            print(f"    - {stat['rel_type']}: {stat['count']}")
            total_relationships += stat['count']

        print(f"\n🎉 KNOWLEDGE GRAPH CREATION COMPLETE!")
        print(f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
        print(f"📊 Created {table_count} table nodes and {column_count} column nodes")
        print(f"🔗 Established {total_relationships} semantic relationships")
        print(f"⚡ Graph is ready for querying and chatbot integration!")
        print(f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
            
except Exception as e:
    print(f"❌ Error creating knowledge graph: {e}")
    print("Please check your Neo4j connection and try again.")

# Clean up
try:
    integrator.close()
    print("\n✅ Neo4j connection closed successfully.")
except:
    pass