In [1]:
# AI Technology Categories Import Notebook
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
import logging
from typing import List, Dict, Tuple, Optional
import re
from datetime import datetime
import uuid

# Configuration
NEO4J_URI = "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "kuxFc8HN"  # Adjust as needed
CSV_PATH = r"D:\Docker\neo4j\import\AI-Technology-Categories-v1.3.csv"  # Updated to v1.3

# Enhanced logging setup
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def verify_neo4j_connection() -> bool:
    """Verify Neo4j connection with enhanced error reporting"""
    try:
        with GraphDatabase.driver(
            NEO4J_URI, 
            auth=(NEO4J_USER, NEO4J_PASSWORD),
            connection_timeout=5
        ) as driver:
            driver.verify_connectivity()
            logger.info("Neo4j connection verified successfully")
            return True
    except Exception as e:
        logger.error(f"Neo4j connection failed: {str(e)}")
        return False

In [2]:
class AITechDataPrep:
    """Handles data preparation and validation for AI Technology Categories"""
    
    def __init__(self, csv_path: str):
        self.csv_path = csv_path
        self.df: Optional[pd.DataFrame] = None
        self.logger = logging.getLogger(__name__)

    def load_and_prepare(self) -> pd.DataFrame:
        """Primary data loading and preparation pipeline"""
        try:
            # Read CSV file
            self.df = pd.read_csv(self.csv_path, encoding='utf-8')
            
            # Print actual columns for debugging
            self.logger.info("Actual columns in file:")
            self.logger.info(self.df.columns.tolist())
            
            self._sanitize_data()
            self._validate_schema()
            self._prepare_relationships()
            return self.df
        except Exception as e:
            self.logger.error(f"Data preparation failed: {str(e)}")
            raise

    def _sanitize_data(self) -> None:
        """Sanitize data for Neo4j import"""
        if self.df is None:
            raise ValueError("DataFrame not initialized")
            
        self.df = self.df.replace({
            r'\n': ' ',
            r'\r': ' ',
            r'\t': ' ',
            r'\s+': ' '  # Collapse multiple spaces
        }, regex=True)
        
        # Strip whitespace and handle null values
        for column in self.df.columns:
            if self.df[column].dtype == object:
                self.df[column] = self.df[column].str.strip()
                
        self.logger.info("Data sanitization completed")

    def _validate_schema(self) -> None:
        """Validate required columns and data types"""
        required_columns = {
            'ai_category': str,
            'category_definition': str,
            'zone': str,
            'keywords': str,
            'capabilities': str,
            'business_language': str,
            'input_data_types': str,
            'generated_output': str,
            'operational_metrics': str,
            'model_artifacts': str,
            'vendor_ecosystem_tier': str,
            'dependent_technologies': str,
            'maturity_level': str,
            'integration_patterns': str
        }
        
        missing_columns = [col for col in required_columns if col not in self.df.columns]
        if missing_columns:
            raise ValueError(f"Missing required columns: {missing_columns}")
            
        # Validate zone values
        valid_zones = {
            'Analytical Intelligence',
            'Domain Specific',
            'Enterprise Enablement',
            'Core Infrastructure'
        }
        if self.df is not None and 'zone' in self.df.columns:
            invalid_zones = set(self.df['zone'].unique()) - valid_zones
            if invalid_zones:
                raise ValueError(f"Invalid zone values found: {invalid_zones}")

        self.logger.info("Schema validation completed")

    def _prepare_relationships(self) -> None:
        """Prepare relationship data for Neo4j import"""
        if self.df is None:
            raise ValueError("DataFrame not initialized")
            
        # Split delimited fields and create relationship mappings
        relationship_columns = [
            'keywords', 'capabilities', 'integration_patterns',
            'vendor_ecosystem_tier', 'dependent_technologies'
        ]
        
        for column in relationship_columns:
            if column in self.df.columns:
                # Create relationship lists
                self.df[f'{column}_list'] = self.df[column].str.split(';')
                
                # Clean individual items
                self.df[f'{column}_list'] = self.df[f'{column}_list'].apply(
                    lambda x: [item.strip() for item in x] if isinstance(x, list) else []
                )

        self.logger.info("Relationship data preparation completed")

    def get_unique_values(self, column: str) -> List[str]:
        """Extract unique values from a semicolon-delimited column"""
        if self.df is None:
            raise ValueError("DataFrame not initialized")
            
        if column not in self.df.columns:
            raise ValueError(f"Column {column} not found in DataFrame")
            
        values = set()
        for item in self.df[column].str.split(';'):
            if isinstance(item, list):
                values.update([x.strip() for x in item])
        return sorted(list(values))

In [3]:
class Neo4jTransactionManager:
    """Manages Neo4j transactions with retry logic and error handling"""
    
    def __init__(self, uri: str, user: str, password: str):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self.logger = logging.getLogger(__name__)
        
    def close(self):
        """Clean up driver resources"""
        self.driver.close()

    def execute_write(self, query: str, params: Dict = None, retries: int = 3) -> None:
        """Execute write operations with retry logic"""
        last_error = None
        for attempt in range(retries):
            try:
                with self.driver.session() as session:
                    session.execute_write(
                        lambda tx: tx.run(query, params or {})
                    )
                return
            except Exception as e:
                last_error = e
                self.logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
                if attempt < retries - 1:
                    continue
                raise last_error

    def execute_read(self, query: str, params: Dict = None) -> List[Dict]:
        """Execute read operations with error handling"""
        try:
            with self.driver.session() as session:
                result = session.execute_read(
                    lambda tx: list(tx.run(query, params or {}))
                )
                return [record.data() for record in result]
        except Exception as e:
            self.logger.error(f"Read operation failed: {str(e)}")
            raise

    def cleanup_database(self) -> None:
        """Emergency cleanup for failed imports"""
        try:
            self.execute_write("MATCH (n) DETACH DELETE n")
            self.logger.info("Database cleanup completed")
        except Exception as e:
            self.logger.error(f"Cleanup failed: {str(e)}")
            raise

    def create_constraints(self) -> None:
        """Set up required constraints and indices"""
        constraints = [
            "CREATE CONSTRAINT ai_category_id IF NOT EXISTS FOR (c:AICategory) REQUIRE c.id IS UNIQUE",
            "CREATE CONSTRAINT keyword_name IF NOT EXISTS FOR (k:Keyword) REQUIRE k.name IS UNIQUE",
            "CREATE CONSTRAINT capability_name IF NOT EXISTS FOR (c:Capability) REQUIRE c.name IS UNIQUE",
            "CREATE CONSTRAINT zone_name IF NOT EXISTS FOR (z:Zone) REQUIRE z.name IS UNIQUE",
            "CREATE INDEX ai_category_name IF NOT EXISTS FOR (c:AICategory) ON (c.name)",
            "CREATE INDEX ai_category_zone IF NOT EXISTS FOR (c:AICategory) ON (c.zone)",
            "CREATE INDEX keyword_index IF NOT EXISTS FOR (k:Keyword) ON (k.name)",
            "CREATE INDEX capability_index IF NOT EXISTS FOR (c:Capability) ON (c.name)"
        ]
        
        for constraint in constraints:
            try:
                self.execute_write(constraint)
            except Exception as e:
                self.logger.error(f"Constraint creation failed: {str(e)}")
                raise

In [4]:
class AITechImporter:
    """Orchestrates the import of AI Technology Categories into Neo4j"""
    
    def __init__(self, tx_manager: Neo4jTransactionManager):
        self.tx_manager = tx_manager
        self.logger = logging.getLogger(__name__)

    def import_categories(self, df: pd.DataFrame) -> None:
        """Stage 1: Create core category nodes"""
        # First create Zone nodes
        zone_query = """
        UNWIND $zones as z
        MERGE (zone:Zone {name: z})
        """
        
        unique_zones = df['zone'].unique().tolist()
        self.tx_manager.execute_write(zone_query, {'zones': unique_zones})
        self.logger.info(f"Created zones: {', '.join(unique_zones)}")

        # Then create categories and link to zones
        category_query = """
        MATCH (z:Zone {name: $zone})
        MERGE (c:AICategory {id: $id})
        SET c.name = $name,
            c.category_definition = $category_definition,
            c.created_at = datetime(),
            c.maturity_level = $maturity_level,
            c.zone = $zone
        MERGE (c)-[:BELONGS_TO]->(z)
        """
        
        for _, row in df.iterrows():
            params = {
                'id': str(uuid.uuid4()),
                'name': row['ai_category'],
                'category_definition': row['category_definition'],
                'maturity_level': row['maturity_level'],
                'zone': row['zone']
            }
            try:
                self.tx_manager.execute_write(category_query, params)
                self.logger.info(f"Created category: {row['ai_category']} in zone: {row['zone']}")
            except Exception as e:
                self.logger.error(f"Failed to create category {row['ai_category']}: {str(e)}")
                raise

    def import_keywords_and_capabilities(self, df: pd.DataFrame) -> None:
        """Stage 2: Create keyword and capability relationships"""
        keyword_query = """
        MATCH (c:AICategory {name: $category})
        MERGE (k:Keyword {name: $keyword})
        MERGE (c)-[r:TAGGED_WITH]->(k)
        ON CREATE SET r.weight = 1
        ON MATCH SET r.weight = r.weight + 1
        """
        
        capability_query = """
        MATCH (c:AICategory {name: $category})
        MERGE (cap:Capability {name: $capability})
        MERGE (c)-[:HAS_CAPABILITY]->(cap)
        """
        
        for _, row in df.iterrows():
            category = row['ai_category']
            
            # Process keywords
            keywords = row['keywords'].split(';')
            for kw in keywords:
                self.tx_manager.execute_write(keyword_query, {
                    'category': category,
                    'keyword': kw.strip()
                })
            
            # Process capabilities
            capabilities = row['capabilities'].split(';')
            for cap in capabilities:
                self.tx_manager.execute_write(capability_query, {
                    'category': category,
                    'capability': cap.strip()
                })

    def import_dependencies(self, df: pd.DataFrame) -> None:
        """Import dependencies with case-insensitive matching and zone tracking"""
        query = """
        MATCH (c1:AICategory)-[:BELONGS_TO]->(z1:Zone)
        WHERE toLower(c1.name) = toLower($source)
        MATCH (c2:AICategory)-[:BELONGS_TO]->(z2:Zone)
        WHERE toLower(c2.name) = toLower($target)
        MERGE (c1)-[r:DEPENDS_ON]->(c2)
        SET r.cross_zone = CASE WHEN z1.name = z2.name THEN false ELSE true END,
            r.created_at = datetime()
        """
    
        for _, row in df.iterrows():
            source = row['ai_category']
            dependencies = row['dependent_technologies'].split(';')
        
            for dep in dependencies:
                dep = dep.strip()
                if dep:  # Skip empty strings
                    try:
                        self.tx_manager.execute_write(query, {
                            'source': source,
                            'target': dep
                        })
                        self.logger.info(f"Created dependency: {source} -> {dep}")
                    except Exception as e:
                        self.logger.error(f"Failed to create dependency {source} -> {dep}: {str(e)}")

    def import_integration_patterns(self, df: pd.DataFrame) -> None:
        """Stage 4: Create integration pattern relationships"""
        query = """
        MATCH (c:AICategory {name: $category})
        MERGE (i:IntegrationPattern {name: $pattern})
        MERGE (c)-[:INTEGRATES_VIA]->(i)
        """
        
        for _, row in df.iterrows():
            category = row['ai_category']
            patterns = row['integration_patterns'].split(';')
            
            for pattern in patterns:
                pattern = pattern.strip()
                if pattern:
                    self.tx_manager.execute_write(query, {
                        'category': category,
                        'pattern': pattern
                    })

In [5]:
from typing import Any

class ImportVerification:
    """Verifies data import integrity and relationships"""
    
    def __init__(self, tx_manager: Neo4jTransactionManager):
        self.tx_manager = tx_manager
        self.logger = logging.getLogger(__name__)

    def verify_categories(self) -> Dict[str, Any]:
        """Verify core category import"""
        query = """
        MATCH (c:AICategory)
        RETURN count(c) as category_count,
               count(DISTINCT c.id) as unique_ids,
               count(c.category_definition) as definitions,
               collect(DISTINCT c.maturity_level) as maturity_levels,
               collect(DISTINCT c.zone) as zones
        """
        
        results = self.tx_manager.execute_read(query)[0]
        return {
            'status': 'PASS' if results['category_count'] == results['unique_ids'] else 'FAIL',
            'counts': results
        }

    def verify_relationships(self) -> Dict[str, Any]:
        """Verify relationship integrity"""
        queries = {
            'keywords': """
                MATCH (c:AICategory)-[r:TAGGED_WITH]->(k:Keyword)
                RETURN count(DISTINCT c) as categories,
                       count(DISTINCT k) as keywords,
                       count(r) as relationships
            """,
            'capabilities': """
                MATCH (c:AICategory)-[r:HAS_CAPABILITY]->(cap:Capability)
                RETURN count(DISTINCT c) as categories,
                       count(DISTINCT cap) as capabilities,
                       count(r) as relationships
            """,
            'dependencies': """
                MATCH (c1:AICategory)-[r:DEPENDS_ON]->(c2:AICategory)
                RETURN count(DISTINCT c1) as source_categories,
                       count(DISTINCT c2) as target_categories,
                       count(r) as relationships
            """,
            'zones': """
                MATCH (c:AICategory)-[:BELONGS_TO]->(z:Zone)
                RETURN count(DISTINCT z) as zone_count,
                       count(c) as categorized_count,
                       collect(DISTINCT z.name) as zone_names
            """
        }
        
        results = {}
        for name, query in queries.items():
            results[name] = self.tx_manager.execute_read(query)[0]
            
        return {
            'status': 'PASS' if self._check_relationship_integrity(results) else 'FAIL',
            'counts': results
        }

    def _check_relationship_integrity(self, results: Dict) -> bool:
        return (
            results['keywords']['categories'] > 0 and
            results['keywords']['keywords'] > 0 and
            results['capabilities']['categories'] > 0 and
            results['zones']['zone_count'] == 4  # We expect exactly 4 zones
        )

    def verify_data_consistency(self) -> Dict[str, Any]:
        """Verify data consistency and referential integrity"""
        query = """
        MATCH (c:AICategory)
        OPTIONAL MATCH (c)-[:TAGGED_WITH]->(k:Keyword)
        OPTIONAL MATCH (c)-[:HAS_CAPABILITY]->(cap:Capability)
        OPTIONAL MATCH (c)-[:DEPENDS_ON]->(d:AICategory)
        OPTIONAL MATCH (c)-[:BELONGS_TO]->(z:Zone)
        RETURN c.name as category,
               z.name as zone,
               count(DISTINCT k) as keyword_count,
               count(DISTINCT cap) as capability_count,
               count(DISTINCT d) as dependency_count
        """
        
        results = self.tx_manager.execute_read(query)
        
        return {
            'status': 'PASS' if self._validate_consistency(results) else 'FAIL',
            'details': results
        }

    def _validate_consistency(self, results: List[Dict]) -> bool:
        """Check for minimum relationship requirements"""
        return all(
            r['keyword_count'] > 0 and 
            r['capability_count'] > 0 and
            r['zone'] is not None
            for r in results
        )

    def run_full_verification(self) -> Dict[str, Any]:
        """Execute all verification checks"""
        verifications = {
            'categories': self.verify_categories(),
            'relationships': self.verify_relationships(),
            'consistency': self.verify_data_consistency()
        }
        
        overall_status = all(v['status'] == 'PASS' for v in verifications.values())
        
        return {
            'status': 'PASS' if overall_status else 'FAIL',
            'details': verifications
        }

In [6]:
def main():
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    logger = logging.getLogger(__name__)

    try:
        # Verify connection and initialize components
        if not verify_neo4j_connection():
            raise ConnectionError("Failed to connect to Neo4j")

        tx_manager = Neo4jTransactionManager(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
        data_prep = AITechDataPrep(CSV_PATH)
        importer = AITechImporter(tx_manager)
        verifier = ImportVerification(tx_manager)

        # Import process
        try:
            # Stage 1: Preparation
            df = data_prep.load_and_prepare()
            tx_manager.create_constraints()

            # Stage 2: Core Import
            for stage in [
                ('categories', importer.import_categories),
                ('keywords and capabilities', importer.import_keywords_and_capabilities),
                ('dependencies', importer.import_dependencies),
                ('integration patterns', importer.import_integration_patterns)
            ]:
                logger.info(f"Importing {stage[0]}...")
                stage[1](df)

            # Stage 3: Verification
            logger.info("Running verification...")
            verification_results = verifier.run_full_verification()
            
            if verification_results['status'] == 'FAIL':
                # Log details before potential cleanup
                logger.warning("Verification results:")
                for key, value in verification_results['details'].items():
                    logger.warning(f"{key}: {value}")
                
                # Optional: Add manual override for dependency check
                if (verification_results['details']['relationships']['status'] == 'FAIL' and
                    all(r['keyword_count'] > 0 and r['capability_count'] > 0 and r['zone'] is not None
                        for r in verification_results['details']['consistency']['details'])):
                    logger.info("Override: Accepting import despite dependency verification failure")
                    return verification_results
                
                logger.error("Import verification failed")
                raise ValueError("Critical verification checks failed")

            logger.info("Import completed successfully!")
            return verification_results

        except Exception as e:
            logger.error(f"Import failed: {str(e)}")
            logger.info("Attempting cleanup...")
            tx_manager.cleanup_database()
            raise

    except Exception as e:
        logger.error(f"Process failed: {str(e)}")
        raise
    finally:
        tx_manager.close()

if __name__ == "__main__":
    main()

2025-02-02 19:04:19,592 - INFO - Neo4j connection verified successfully
2025-02-02 19:04:19,599 - INFO - Actual columns in file:
2025-02-02 19:04:19,600 - INFO - ['ai_category', 'category_definition', 'zone', 'keywords', 'capabilities', 'business_language', 'input_data_types', 'generated_output', 'operational_metrics', 'model_artifacts', 'vendor_ecosystem_tier', 'dependent_technologies', 'maturity_level', 'integration_patterns']
2025-02-02 19:04:19,608 - INFO - Data sanitization completed
2025-02-02 19:04:19,609 - INFO - Schema validation completed
2025-02-02 19:04:19,613 - INFO - Relationship data preparation completed
2025-02-02 19:04:20,143 - INFO - Received notification from DBMS server: {severity: INFORMATION} {code: Neo.ClientNotification.Schema.IndexOrConstraintAlreadyExists} {category: SCHEMA} {title: `CREATE RANGE INDEX keyword_index IF NOT EXISTS FOR (e:Keyword) ON (e.name)` has no effect.} {description: `RANGE INDEX keyword_name FOR (e:Keyword) ON (e.name)` already exists.} 

In [12]:
class AITechQueries:
    def __init__(self, tx_manager: Neo4jTransactionManager):
        self.tx_manager = tx_manager

    def find_by_keyword(self, keyword: str) -> List[Dict]:
        query = """
        MATCH (k:Keyword {name: $keyword})<-[r:TAGGED_WITH]-(c:AICategory)
        RETURN c.name as category, 
               c.category_definition as definition,
               c.zone as zone,
               r.weight as relevance
        ORDER BY r.weight DESC
        """
        return self.tx_manager.execute_read(query, {'keyword': keyword})

    def get_category_capabilities(self, category: str) -> List[Dict]:
        query = """
        MATCH (c:AICategory {name: $category})-[:HAS_CAPABILITY]->(cap:Capability)
        RETURN cap.name as capability
        ORDER BY capability
        """
        return self.tx_manager.execute_read(query, {'category': category})

    def get_dependency_chain(self, category: str) -> List[Dict]:
        query = """
        MATCH path = (c:AICategory {name: $category})-[:DEPENDS_ON*]->(dep:AICategory)
        RETURN [node IN nodes(path) | node.name] as dependency_chain,
               [node IN nodes(path) | node.zone] as zone_chain
        """
        return self.tx_manager.execute_read(query, {'category': category})

    def find_related_categories(self, category: str) -> List[Dict]:
        query = """
        MATCH (c:AICategory {name: $category})-[:TAGGED_WITH]->(k:Keyword)
        MATCH (k)<-[:TAGGED_WITH]-(related:AICategory)
        WHERE related <> c
        WITH related, count(k) as shared_keywords
        RETURN related.name as category, 
               related.zone as zone,
               shared_keywords,
               related.maturity_level as maturity
        ORDER BY shared_keywords DESC
        LIMIT 5
        """
        return self.tx_manager.execute_read(query, {'category': category})

    def get_maturity_distribution(self) -> List[Dict]:
        query = """
        MATCH (c:AICategory)
        RETURN c.maturity_level as level, 
               count(*) as count,
               collect({name: c.name, zone: c.zone}) as categories
        ORDER BY level
        """
        return self.tx_manager.execute_read(query, {})

    def get_categories_by_zone(self) -> List[Dict]:
        query = """
        MATCH (c:AICategory)-[:BELONGS_TO]->(z:Zone)
        RETURN z.name as zone,
               collect({
                   name: c.name,
                   definition: c.category_definition,
                   maturity: c.maturity_level
               }) as categories
        ORDER BY z.name
        """
        return self.tx_manager.execute_read(query, {})

    def get_category_details(self, category: str) -> Dict:
        query = """
        MATCH (c:AICategory {name: $category})-[:BELONGS_TO]->(z:Zone)
        OPTIONAL MATCH (c)-[:TAGGED_WITH]->(k:Keyword)
        OPTIONAL MATCH (c)-[:HAS_CAPABILITY]->(cap:Capability)
        OPTIONAL MATCH (c)-[:DEPENDS_ON]->(d:AICategory)
        RETURN c.name as name,
               c.category_definition as definition,
               c.maturity_level as maturity,
               z.name as zone,
               collect(DISTINCT k.name) as keywords,
               collect(DISTINCT cap.name) as capabilities,
               collect(DISTINCT d.name) as dependencies
        """
        results = self.tx_manager.execute_read(query, {'category': category})
        return results[0] if results else None

In [13]:
# First create the transaction manager
tx_manager = Neo4jTransactionManager(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Then create queries object and execute
queries = AITechQueries(tx_manager)

# Example queries
print("\nKeyword search results:")
results = queries.find_by_keyword("machine learning")
print(results)

print("\nRelated categories for NLP:")
related = queries.find_related_categories("Natural Language Processing (NLP)")
print(related)

print("\nCategories by Zone:")
zone_results = queries.get_categories_by_zone()
print(zone_results)

print("\nDetailed category information:")
category_details = queries.get_category_details("Data Integration & Management")
print(category_details)

# Close connection when done
tx_manager.close()


Keyword search results:
[{'category': 'Predictive & Pattern Analytics', 'definition': 'Systems that identify patterns in historical data to predict future outcomes and trends, supporting proactive decision-making and planning', 'zone': 'Analytical Intelligence', 'relevance': 1}]

Related categories for NLP:
[]

Categories by Zone:
[{'zone': 'Analytical Intelligence', 'categories': [{'maturity': 'mature', 'definition': 'Systems that identify patterns in historical data to predict future outcomes and trends, supporting proactive decision-making and planning', 'name': 'Predictive & Pattern Analytics'}, {'maturity': 'mature', 'definition': 'Systems that understand, analyze, and generate human language, enabling automated document processing, translation, and human-AI interaction', 'name': 'Natural Language Processing (NLP)'}, {'maturity': 'emerging', 'definition': 'Advanced AI systems that combine multiple types of input data (text, images, audio) to provide more comprehensive analysis an

In [14]:
# First create the transaction manager
tx_manager = Neo4jTransactionManager(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Create verification queries
verification_queries = {
    'maturity': """
        MATCH (c:AICategory)
        RETURN c.maturity_level as level, count(*) as count,
               collect(DISTINCT c.zone) as zones;
    """,
    
    'keywords': """
        MATCH (k:Keyword)<-[r:TAGGED_WITH]-(c:AICategory)
        RETURN k.name as keyword, count(r) as usage_count,
               collect(DISTINCT c.zone) as zones
        ORDER BY usage_count DESC
        LIMIT 10;
    """,
    
    'categories': """
        MATCH (c:AICategory)-[:BELONGS_TO]->(z:Zone)
        OPTIONAL MATCH (c)-[:TAGGED_WITH]->(k:Keyword)
        OPTIONAL MATCH (c)-[:HAS_CAPABILITY]->(cap:Capability)
        RETURN c.name as category,
               z.name as zone,
               count(DISTINCT k) as keyword_count,
               count(DISTINCT cap) as capability_count
        ORDER BY c.name;
    """
}

# Execute and display results
with tx_manager.driver.session() as session:
    for name, query in verification_queries.items():
        print(f"\n{name.upper()} ANALYSIS:")
        results = session.run(query).data()
        for result in results:
            print(result)

# Close connection when done
tx_manager.close()


MATURITY ANALYSIS:
{'level': 'emerging', 'count': 6, 'zones': ['Core Infrastructure', 'Domain Specific', 'Enterprise Enablement', 'Analytical Intelligence']}
{'level': 'established', 'count': 5, 'zones': ['Analytical Intelligence', 'Core Infrastructure', 'Enterprise Enablement', 'Domain Specific']}
{'level': 'mature', 'count': 3, 'zones': ['Core Infrastructure', 'Analytical Intelligence']}

KEYWORDS ANALYSIS:
{'keyword': 'task automation', 'usage_count': 2, 'zones': ['Enterprise Enablement', 'Domain Specific']}
{'keyword': 'feature extraction', 'usage_count': 2, 'zones': ['Analytical Intelligence']}
{'keyword': 'sensor fusion', 'usage_count': 2, 'zones': ['Core Infrastructure', 'Analytical Intelligence']}
{'keyword': 'workflow automation', 'usage_count': 2, 'zones': ['Enterprise Enablement', 'Domain Specific']}
{'keyword': 'anomaly detection', 'usage_count': 2, 'zones': ['Core Infrastructure', 'Analytical Intelligence']}
{'keyword': 'model governance', 'usage_count': 1, 'zones': ['Cor

In [16]:
# First create the transaction manager
tx_manager = Neo4jTransactionManager(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Query for categories, definitions, and zones
categories_query = """
MATCH (c:AICategory)-[:BELONGS_TO]->(z:Zone)
RETURN c.name as category, 
       c.category_definition as definition,
       z.name as zone
ORDER BY z.name, c.name;
"""

# Execute and display results
with tx_manager.driver.session() as session:
    print("AI TECHNOLOGY CATEGORIES:\n")
    results = session.run(categories_query).data()
    current_zone = None
    for result in results:
        # Print zone header when zone changes
        if current_zone != result['zone']:
            current_zone = result['zone']
            print(f"\n=== {current_zone} ===\n")
        
        print(f"Category: {result['category']}")
        print(f"Definition: {result['definition']}")
        print("-" * 80 + "\n")

AI TECHNOLOGY CATEGORIES:


=== Analytical Intelligence ===

Category: Computer Vision & Media Analysis
Definition: Systems that process and analyze visual information from images and videos, enabling automated identification, classification, and understanding of visual content
--------------------------------------------------------------------------------

Category: Multimodal AI Systems
Definition: Advanced AI systems that combine multiple types of input data (text, images, audio) to provide more comprehensive analysis and interaction capabilities
--------------------------------------------------------------------------------

Category: Natural Language Processing (NLP)
Definition: Systems that understand, analyze, and generate human language, enabling automated document processing, translation, and human-AI interaction
--------------------------------------------------------------------------------

Category: Predictive & Pattern Analytics
Definition: Systems that identify pattern

In [18]:
# First create the transaction manager
tx_manager = Neo4jTransactionManager(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Comprehensive category details query
detailed_query = """
MATCH (c:AICategory)
OPTIONAL MATCH (c)-[:TAGGED_WITH]->(k:Keyword)
OPTIONAL MATCH (c)-[:HAS_CAPABILITY]->(cap:Capability)
RETURN 
   c.name as category,
   c.category_definition as definition,
   c.maturity_level as maturity,
   collect(DISTINCT k.name) as keywords,
   collect(DISTINCT cap.name) as capabilities
ORDER BY c.name;
"""

with tx_manager.driver.session() as session:
   results = session.run(detailed_query).data()
   for result in results:
       print(f"\nCATEGORY: {result['category']}")
       print(f"MATURITY: {result['maturity']}")
       print(f"\nDEFINITION:")
       print(result['definition'])
       print("\nKEYWORDS:")
       print(", ".join(result['keywords']))
       print("\nCAPABILITIES:")
       print(", ".join(result['capabilities']))
       print("=" * 100)


CATEGORY: AI Development & Operations (AIOps)
MATURITY: emerging

DEFINITION:
Platforms and tools for developing, deploying, and managing AI systems reliably at scale, with emphasis on automation, monitoring, and governance

KEYWORDS:
mlops, devops for ai, model management, feature engineering, model monitoring, ai pipeline management, model governance, continuous integration, continuous deployment, feature store, model registry, experiment tracking, model serving, model versioning, reproducibility, containerization, orchestration, model lineage, compliance monitoring, drift detection, a/b testing, canary deployment, model observability, automated ml, pipeline automation

CAPABILITIES:
model development, model deployment, model monitoring, feature engineering, pipeline management, version control, experiment tracking, automated testing, deployment automation, model registry management, feature store operations, compliance validation, drift detection, performance optimization, resource

In [19]:
# First create the transaction manager
tx_manager = Neo4jTransactionManager(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

dependency_check = """
MATCH (c:AICategory)
OPTIONAL MATCH (c)-[:DEPENDS_ON]->(d:AICategory)
RETURN c.name as category, collect(d.name) as dependencies;
"""

with tx_manager.driver.session() as session:
    print("\nDependency Check:")
    for result in session.run(dependency_check).data():
        print(f"{result['category']}: {result['dependencies']}")


Dependency Check:
AI Development & Operations (AIOps): ['Data Integration & Management', 'Responsible AI Systems']
Computer Vision & Media Analysis: ['Multimodal AI Systems', 'Data Integration & Management', 'Edge AI & IoT']
Cybersecurity & Threat Detection: ['Data Integration & Management', 'Predictive & Pattern Analytics']
Data Integration & Management: ['Responsible AI Systems']
Decision Support & Optimization: ['Data Integration & Management', 'Predictive & Pattern Analytics']
Edge AI & IoT: ['Data Integration & Management']
Environmental & Geospatial AI: ['Edge AI & IoT', 'Data Integration & Management', 'Computer Vision & Media Analysis']
Healthcare & Biotech AI: ['Responsible AI Systems', 'Computer Vision & Media Analysis']
Intelligent End-User Computing: ['Process Automation & Robotics']
Multimodal AI Systems: ['Data Integration & Management', 'Computer Vision & Media Analysis']
Natural Language Processing (NLP): ['Responsible AI Systems', 'Data Integration & Management']
Pred

In [None]:
# NOTE - the below will clear the whole database.  Leaving as commented out so it will not run

# First create the transaction manager
# tx_manager = Neo4jTransactionManager(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Clear database
# tx_manager.cleanup_database()