# Neo4j Database Schema Extraction
**Extraction and documentation of Neo4j database schema using APOC procedures, with automated JSON export and troubleshooting guidance.**

In [None]:
# Cell [0] - Setup and Imports
# Purpose: Import all required libraries and configure environment settings for Neo4j Schema Extraction
# Dependencies: os, sys, json, datetime, pathlib, dotenv, src.neo4j modules
# Breadcrumbs: Setup -> Imports -> Environment Configuration

import os
import sys
import json
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv

def setup_environment():
    """
    Configure Python path, logging, and load environment variables for Neo4j schema extraction
    
    Returns:
        dict: Configuration parameters including Neo4j connection details and paths
    """
    # Get the current working directory (should be notebooks directory)
    current_dir = Path.cwd()
    
    # Determine project root - if we're in notebooks/, go up one level
    if current_dir.name == 'notebooks':
        project_root = current_dir.parent
    else:
        # If running from project root, use current directory
        project_root = current_dir
    
    # Only add project root to path for environment files (not src!)
    # Since package is installed with pip install -e ., Python will find it automatically
    if str(project_root) not in sys.path:
        sys.path.insert(0, str(project_root))
    
    # Load environment variables from project root
    env_file = project_root / '.env'
    if env_file.exists():
        load_dotenv(env_file)
    else:
        load_dotenv()  # Try default locations
    
    # Configuration from environment variables
    config = {
        'PROJECT_ROOT': project_root,
        'NEO4J_URI': os.getenv('NEO4J_URI'),
        'NEO4J_USER': os.getenv('NEO4J_USER'),
        'NEO4J_PASSWORD': os.getenv('NEO4J_PASSWORD'),
        'NEO4J_DATABASE': os.getenv('NEO4J_DATABASE'),
        'NEO4J_PROJECT_NAME': os.getenv('NEO4J_PROJECT_NAME'),
        'SCHEMA_PATH': project_root / "schema" / "neo4j-schema.json"
    }
    
    # Create schema directory if it doesn't exist
    schema_dir = config['SCHEMA_PATH'].parent
    schema_dir.mkdir(exist_ok=True)
    
    print(f"Current directory: {current_dir}")
    print(f"Project root: {project_root}")
    print(f"Using installed package (no src path needed)")
    print(f"Schema will be saved to: {config['SCHEMA_PATH']}")
    print(f"Neo4j Project: {config['NEO4J_PROJECT_NAME']}")
    print(f"Neo4j Database: {config['NEO4J_DATABASE']}")
    
    return config

# Execute setup when imported
CONFIG = setup_environment()

# Simple restart message
print("\n" + "="*60)
print("SIMPLE SOLUTION: Just restart your Jupyter kernel!")
print("The package is already installed, just restart and run again.")
print("="*60)

# Import project modules after path setup
try:
    from praxis_requirements_analyzer.neo4j import Neo4jClient, SchemaExtractor
    print("✅ Setup completed successfully!")
    
    # Verify the imports are working
    if Neo4jClient is None or SchemaExtractor is None:
        raise ImportError("Classes imported as None")
        
except ImportError as e:
    print(f"⚠️  Import failed: {e}")
    print("\n❌ Cannot proceed with schema extraction.")
    print("Please ensure:")
    print("1. The package is properly installed: pip install -e .")
    print("2. All relative import issues are fixed")
    print("3. Restart the kernel if needed")
    
    # Stop execution by raising the error
    raise ImportError("Cannot import required Neo4j classes") from e


In [None]:
# Cell [1] - Extract Neo4j Schema and Save to File
# Purpose: Connect to Neo4j database, extract complete schema using APOC, and save to JSON file
# Dependencies: Neo4jClient, SchemaExtractor from Cell 0
# Breadcrumbs: Setup -> Database Connection -> Schema Extraction -> File Output

# Verify imports are available
if 'Neo4jClient' not in globals() or 'SchemaExtractor' not in globals():
    print("❌ Required classes not imported. Please run Cell 0 first!")
    raise NameError("Neo4jClient and SchemaExtractor must be imported first")

def extract_and_save_schema():
    """
    Extract Neo4j database schema and save it to a JSON file
    
    Returns:
        dict: Extracted schema data or None if extraction failed
    """
    # Initialize Neo4j client using configuration from setup
    client = Neo4jClient(
        uri=CONFIG['NEO4J_URI'],
        user=CONFIG['NEO4J_USER'],
        password=CONFIG['NEO4J_PASSWORD'],
        database=CONFIG['NEO4J_DATABASE']
    )
    
    print(f"Connecting to Neo4j Database")
    print("=" * 60)
    print(f"URI: {CONFIG['NEO4J_URI']}")
    print(f"Database: {CONFIG['NEO4J_DATABASE']}")
    print(f"Project: {CONFIG['NEO4J_PROJECT_NAME']}")
    
    try:
        # Connect to database
        client.connect()
        print("Successfully connected to Neo4j database")
        
        # Verify driver is available
        if client.driver is None:
            raise RuntimeError("Neo4j driver is None after connection attempt")
        
        # Initialize schema extractor
        extractor = SchemaExtractor(client.driver, CONFIG['NEO4J_PROJECT_NAME'])
        
        print(f"\nExtracting schema for project: {CONFIG['NEO4J_PROJECT_NAME']}")
        print("-" * 60)
        
        # Extract schema using APOC procedures
        schema = extractor.extract_schema()
        
        if schema and schema.get("schema"):
            # Save schema to file
            extractor.save_schema(schema, str(CONFIG['SCHEMA_PATH']))
            
            print(f"Schema successfully extracted and saved!")
            print(f"File location: {CONFIG['SCHEMA_PATH']}")
            print(f"Schema contains {len(schema.get('schema', {}))} main sections")
            
            # Display schema summary
            print(f"\nSchema Summary:")
            print("-" * 40)
            
            schema_data = schema.get("schema", {})
            for key, value in schema_data.items():
                if isinstance(value, list):
                    print(f"• {key}: {len(value)} items")
                elif isinstance(value, dict):
                    print(f"• {key}: {len(value)} entries")
                else:
                    print(f"• {key}: {type(value).__name__}")
            
            # Display extracted schema content (formatted for readability)
            print(f"\nComplete Extracted Schema:")
            print("=" * 80)
            print(json.dumps(schema, indent=2))
            
            return schema
            
        else:
            print("WARNING: No schema data was extracted.")
            print("\nTroubleshooting Guide:")
            print("-" * 40)
            print("1. Ensure APOC plugin is installed in your Neo4j instance")
            print("2. Verify APOC procedures are enabled in neo4j.conf:")
            print("   dbms.security.procedures.unrestricted=apoc.*")
            print("3. Check user permissions for APOC procedures")
            print("4. Test APOC availability: CALL apoc.help('meta')")
            print("5. Verify project name exists in database")
            
            return None
    
    except Exception as e:
        print(f"ERROR during schema extraction: {str(e)}")
        print(f"\nDetailed Troubleshooting:")
        print("-" * 40)
        print("1. Connection issues:")
        print("   • Check Neo4j URI, username, and password")
        print("   • Verify database is running and accessible")
        print("   • Confirm database name is correct")
        print("2. APOC issues:")
        print("   • Install APOC plugin if not present")
        print("   • Enable APOC procedures in configuration")
        print("   • Restart Neo4j after configuration changes")
        print("3. Permission issues:")
        print("   • Ensure user has read permissions")
        print("   • Check if user can execute APOC procedures")
        
        raise
    
    finally:
        # Ensure connection is properly closed
        if client:
            client.close()
            print(f"\nDatabase connection closed successfully")

# Execute schema extraction - simple synchronous call
print("Starting Neo4j Schema Extraction")
print("=" * 80)

extracted_schema = extract_and_save_schema()

if extracted_schema:
    print(f"\nSchema extraction completed successfully!")
    print(f"Schema saved to: {CONFIG['SCHEMA_PATH']}")
else:
    print(f"\nSchema extraction failed. Please check the troubleshooting guide above.")

print("=" * 80)