# Requirements Judging Workflow
**LLM-based requirements traceability evaluation with actor-judge pattern, batch processing, Redis vector search, and Neo4j result storage.**

In [None]:
# Cell [0] - Setup and Imports
# Purpose: Import all required libraries and configure environment settings for Requirements Judging
# Dependencies: os, sys, logging, asyncio, dotenv, pathlib, datetime, redis, numpy, json, src modules
# Breadcrumbs: Setup -> Imports -> Environment Configuration -> Logging Setup

import os
import sys
import logging
import asyncio
import json
import numpy as np
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
from typing import Dict, List
from redis.asyncio import Redis
from redisvl.index import AsyncSearchIndex
from redisvl.query import VectorQuery

def setup_environment():
    """
    Configure Python path, logging, and load environment variables for Requirements Judging
    
    Returns:
        dict: Configuration parameters including model settings and paths
    """
    # Get the absolute path to the project root directory (parent of notebooks)
    notebook_dir = Path(os.getcwd())
    project_root = notebook_dir.parent
    src_path = project_root / 'src'
    
    # Add the project root to Python path if not already there
    if str(project_root) not in sys.path:
        sys.path.append(str(project_root))
    
    # Load environment variables
    load_dotenv()
    
    # Create logs directory structure if it doesn't exist
    logs_dir = project_root / "logs"
    logs_dir.mkdir(parents=True, exist_ok=True)
    
    # Get model configuration
    current_model = os.getenv('CURRENT_MODEL', 'CLAUDE_3_5_MODEL_ID')
    model_name = os.getenv(current_model, 'claude-2.1')
    
    # Create a safe directory name from model name
    model_dir_name = model_name.lower().replace('-', '_').replace('.', '_')
    model_dir = logs_dir / model_dir_name
    model_dir.mkdir(parents=True, exist_ok=True)
    
    # Create log filename with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    log_filename = model_dir / f"{model_name}_{timestamp}.log"
    
    # Configuration from environment variables
    config = {
        'PROJECT_ROOT': project_root,
        'SRC_PATH': src_path,
        'LOGS_DIR': logs_dir,
        'LOG_FILENAME': log_filename,
        'MODEL_NAME': model_name,
        'CURRENT_MODEL': current_model,
        'TEST_MODE': os.getenv('TEST_MODE', 'False').lower() == 'true',
        'LOG_LEVEL': os.getenv('LOG_LEVEL', 'INFO'),
        'MAX_CONCURRENT_JOBS': int(os.getenv('MAX_CONCURRENT_JOBS', 5)),
        'BATCH_SIZE': int(os.getenv('BATCH_SIZE', 5)),
        'NEO4J_URI': os.getenv('NEO4J_URI'),
        'NEO4J_USER': os.getenv('NEO4J_USER'),
        'NEO4J_PASSWORD': os.getenv('NEO4J_PASSWORD'),
        'NEO4J_DATABASE': os.getenv('NEO4J_DATABASE', 'neo4j'),
        'NEO4J_PROJECT_NAME': os.getenv('NEO4J_PROJECT_NAME', 'eANCI'),
        'REDIS_HOST': os.getenv('REDIS_HOST', 'localhost'),
        'REDIS_PORT': os.getenv('REDIS_PORT', 6379),
        'REDIS_PASSWORD': os.getenv('REDIS_PASSWORD')
    }
    
    print(f"Project root added to path: {project_root}")
    print(f"Model: {config['MODEL_NAME']}")
    print(f"Test Mode: {config['TEST_MODE']}")
    print(f"Log Level: {config['LOG_LEVEL']}")
    print(f"Logs will be written to: {config['LOG_FILENAME']}")
    
    return config

# Execute setup when imported
CONFIG = setup_environment()

# Import project modules after path setup
from praxis_requirements_analyzer.utils.logger import ColoredFormatter, setup_logger
from praxis_requirements_analyzer.neo4j.neo4j_client import Neo4jClient
from praxis_requirements_analyzer.redis.redis_client import RedisClient
from praxis_requirements_analyzer.llm.manager import LLMManager
from praxis_requirements_analyzer.requirements_analyzer.requirements_workflow import RequirementsWorkflow
from praxis_requirements_analyzer.requirements_analyzer.requirements_prompt_manager import RequirementsPromptManager
from praxis_requirements_analyzer.llm.models.huggingface.hf_embeddings_client import HuggingFaceEmbeddingsClient
from praxis_requirements_analyzer.neo4j.requirements_client import RequirementsClient
from praxis_requirements_analyzer.models.requirement import Requirement

print("Setup completed successfully!")


In [None]:
# Cell [1] - Configure Logging System
# Purpose: Set up comprehensive logging with file and console handlers for requirements judging
# Dependencies: ColoredFormatter, setup_logger from Cell 0
# Breadcrumbs: Setup -> Logging Configuration -> Handler Setup

def get_logging_level(level_str: str) -> int:
    """Convert string log level to logging constant"""
    level_map = {
        'DEBUG': logging.DEBUG,
        'INFO': logging.INFO,
        'WARNING': logging.WARNING,
        'ERROR': logging.ERROR,
        'CRITICAL': logging.CRITICAL
    }
    return level_map.get(level_str.upper(), logging.INFO)  # Default to INFO if not specified

def setup_logging():
    """
    Configure comprehensive logging system for requirements judging workflow
    
    Returns:
        tuple: (file_handler, console_handler) for additional logger configuration
    """
    # Get logging level from configuration
    logging_level = get_logging_level(CONFIG['LOG_LEVEL'])
    
    # Set root logger to environment-specified level
    logging.getLogger().setLevel(logging_level)
    
    print(f"Configuring Logging System")
    print("=" * 60)
    print(f"Log Level: {CONFIG['LOG_LEVEL']} ({logging_level})")
    print(f"Test Mode: {CONFIG['TEST_MODE']}")
    print(f"Model: {CONFIG['MODEL_NAME']}")
    print(f"Log File: {CONFIG['LOG_FILENAME']}")
    
    try:
        # Create file handler with DEBUG level for complete logging
        file_handler = logging.FileHandler(CONFIG['LOG_FILENAME'])
        file_handler.setLevel(logging.DEBUG)  # Keep DEBUG level for file logging
        file_handler.setFormatter(logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        ))

        # Create console handler with appropriate level based on test mode
        console_handler = logging.StreamHandler()
        # Set DEBUG level if in test mode, otherwise use INFO
        console_handler.setLevel(logging.DEBUG if CONFIG['TEST_MODE'] else logging.INFO)
        console_handler.setFormatter(ColoredFormatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        ))

        # Define all loggers with their levels
        loggers = {
            # Requirements analysis loggers
            "src.requirements_analyzer.requirements_workflow": logging_level,
            "src.requirements_analyzer.requirements_prompt_manager": logging_level,
            
            # LLM-related loggers
            "src.llm.manager": logging_level,
            "src.llm.manager.llm_manager": logging_level,
            "src.llm.models.anthropic.claude_client": logging_level,
            "src.llm.models.openai.openai_client": logging_level,
            "src.llm.models.huggingface.hf_client": logging_level,
            "src.llm.models.huggingface.hf_embeddings_client": logging_level,
            
            # Database loggers
            "src.redis.redis_client": logging_level,
            "redisvl": logging_level,
            
            # Other loggers
            "initialization": logging_level,
            "redis": logging_level
        }

        # Remove all existing handlers from root logger
        root_logger = logging.getLogger()
        root_logger.handlers = []
        
        # Add our handlers to root logger
        root_logger.addHandler(file_handler)
        root_logger.addHandler(console_handler)

        # Configure all loggers
        for logger_name, level in loggers.items():
            logger = setup_logger(logger_name, level)
            logger.propagate = False  # Prevent duplicate logging
            
            # Clear existing handlers and add our configured handlers
            logger.handlers = []
            logger.addHandler(file_handler)
            logger.addHandler(console_handler)
        
        # Log initial setup information
        root_logger.info(f"Logging system initialized")
        root_logger.info(f"Log file: {CONFIG['LOG_FILENAME']}")
        root_logger.info(f"Model: {CONFIG['MODEL_NAME']}")
        root_logger.info(f"Log level: {CONFIG['LOG_LEVEL']}")
        
        if CONFIG['TEST_MODE']:
            root_logger.info(f"Running in TEST MODE - Console shows DEBUG messages")
        
        if logging_level <= logging.DEBUG:
            root_logger.debug("Debug logging is enabled")

        # Test logger setup with all levels
        test_logger = logging.getLogger("test_logger")
        test_logger.setLevel(logging_level)
        test_logger.debug("Test debug message")
        test_logger.info("Test info message")
        test_logger.warning("Test warning message")
        test_logger.error("Test error message")
        
        print("Logging system configured successfully!")
        return file_handler, console_handler

    except Exception as e:
        print(f"ERROR setting up logging: {str(e)}")
        print(f"Attempted to create log file at: {CONFIG['LOG_FILENAME']}")
        print(f"Current working directory: {os.getcwd()}")
        print(f"Project root: {CONFIG['PROJECT_ROOT']}")
        raise

# Execute logging setup
file_handler, console_handler = setup_logging()

In [None]:
# Cell [2] - Initialize Clients and Workflow
# Purpose: Initialize Neo4j, Redis, LLM components and Requirements Workflow
# Dependencies: Client classes from Cell 0, logging configuration from Cell 1
# Breadcrumbs: Setup -> Logging -> Client Initialization -> Workflow Setup

async def initialize_clients():
    """
    Initialize all clients and workflow components for requirements judging
    
    Returns:
        tuple: (neo4j_client, redis_client, workflow, requirements_client)
    """
    init_logger = logging.getLogger("initialization")
    
    try:
        init_logger.info("Starting client initialization")
        print(f"\nInitializing Clients and Workflow")
        print("=" * 80)
        
        # Initialize Neo4j client with configuration
        init_logger.debug("Initializing Neo4j client")
        print(f"Connecting to Neo4j database...")
        neo4j_client = Neo4jClient(
            uri=CONFIG['NEO4J_URI'],
            user=CONFIG['NEO4J_USER'],
            password=CONFIG['NEO4J_PASSWORD'],
            database=CONFIG['NEO4J_DATABASE']
        )
        await neo4j_client.connect()
        init_logger.info("Neo4j client connected successfully")
        print(f"Neo4j connected to database: {CONFIG['NEO4J_DATABASE']}")

        # Initialize Redis components
        init_logger.debug("Initializing Redis client")
        print(f"Setting up Redis connection...")
        
        # Build Redis URL
        redis_url = f"redis://{CONFIG['REDIS_HOST']}:{CONFIG['REDIS_PORT']}"
        if CONFIG['REDIS_PASSWORD']:
            redis_url = f"redis://:{CONFIG['REDIS_PASSWORD']}@{CONFIG['REDIS_HOST']}:{CONFIG['REDIS_PORT']}"
        
        # Create Redis base client
        redis_base = Redis.from_url(redis_url)
        
        # Verify Redis connection
        try:
            await redis_base.ping()
            init_logger.info("Redis connection verified")
            print(f"Redis connected at {CONFIG['REDIS_HOST']}:{CONFIG['REDIS_PORT']}")
        except Exception as e:
            init_logger.error(f"Redis connection failed: {str(e)}", exc_info=True)
            print(f"Redis connection failed: {str(e)}")
            raise
        
        # Initialize our Redis client wrapper
        redis_client = RedisClient(redis_base)
        
        # Initialize indices in Redis client
        try:
            print(f"Initializing Redis indices...")
            await redis_client.initialize_indices()
            init_logger.info("Successfully created/updated Redis indices")
            print(f"Redis indices initialized successfully")
        except Exception as e:
            init_logger.error(f"Failed to initialize Redis indices: {str(e)}", exc_info=True)
            print(f"Failed to initialize Redis indices: {str(e)}")
            raise

        # Initialize LLM components
        init_logger.debug("Initializing LLM components")
        print(f"Setting up LLM Manager...")
        llm_manager = LLMManager()
        
        # Initialize the models
        try:
            await llm_manager.initialize_models()
            init_logger.info(f"LLM manager initialized with model: {CONFIG['MODEL_NAME']}")
            print(f"LLM Manager initialized with model: {CONFIG['MODEL_NAME']}")
        except Exception as e:
            init_logger.error(f"Failed to initialize LLM models: {str(e)}", exc_info=True)
            print(f"Failed to initialize LLM models: {str(e)}")
            raise

        # Initialize additional components
        init_logger.debug("Initializing prompt manager and embedding client")
        print(f"Initializing supporting components...")
        prompt_manager = RequirementsPromptManager()
        embedding_client = HuggingFaceEmbeddingsClient()

        # Initialize requirements client with project name
        init_logger.debug("Initializing requirements client")
        requirements_client = RequirementsClient(
            neo4j_client=neo4j_client,
            project_name=CONFIG['NEO4J_PROJECT_NAME']
        )
        print(f"Requirements client initialized for project: {CONFIG['NEO4J_PROJECT_NAME']}")

        # Initialize workflow with proper components
        init_logger.debug("Initializing requirements workflow")
        print(f"Setting up Requirements Workflow...")
        workflow = RequirementsWorkflow(
            llm_manager=llm_manager,
            prompt_manager=prompt_manager,
            model_name=CONFIG['MODEL_NAME'],
            redis_client=redis_client,
            embedding_client=embedding_client
        )
        
        # Explicitly set workflow logger level
        workflow.logger.setLevel(logging.DEBUG)
        
        # Initialize RedisVL indices
        try:
            print(f"Initializing RedisVL indices...")
            await workflow.init_indices()
            init_logger.info("Successfully created RedisVL indices")
            print(f"RedisVL indices created successfully")
        except Exception as e:
            init_logger.error(f"Failed to create RedisVL indices: {str(e)}", exc_info=True)
            # Log the Redis client state for debugging
            init_logger.debug(f"Redis client state: {redis_client.client}")
            try:
                ping_result = await redis_client.client.ping()
                init_logger.debug(f"Redis client ping: {ping_result}")
            except Exception as ping_error:
                init_logger.debug(f"Redis ping failed: {ping_error}")
            print(f"Failed to create RedisVL indices: {str(e)}")
            raise
        
        init_logger.info(f"Initialization complete - all components ready")
        print(f"\nAll Components Initialized Successfully!")
        print("=" * 80)
        print(f"Model: {CONFIG['MODEL_NAME']}")
        print(f"Project: {CONFIG['NEO4J_PROJECT_NAME']}")
        print(f"Test Mode: {CONFIG['TEST_MODE']}")
        print(f"Max Concurrent Jobs: {CONFIG['MAX_CONCURRENT_JOBS']}")
        print(f"Batch Size: {CONFIG['BATCH_SIZE']}")
        
        return neo4j_client, redis_client, workflow, requirements_client
        
    except Exception as e:
        init_logger.critical(f"Critical error during initialization: {str(e)}", exc_info=True)
        print(f"Critical initialization error: {str(e)}")
        raise

# Execute client initialization
init_logger = logging.getLogger("initialization")
init_logger.info("Starting client initialization process")

try:
    neo4j_client, redis_client, workflow, requirements_client = await initialize_clients()
    init_logger.info("Client initialization completed successfully")
except Exception as e:
    init_logger.critical("Failed to initialize clients", exc_info=True)
    raise

In [None]:
# Cell [3] - Define Helper Functions
# Purpose: Define the main requirements processing function with batch handling
# Dependencies: Workflow and clients from Cell 2, CONFIG from Cell 0
# Breadcrumbs: Client Initialization -> Helper Functions -> Batch Processing Logic

async def process_requirements():
    """
    Process requirements using the initialized workflow, handling requirements concurrently in batches.
    
    Returns:
        List[Dict]: List of requirement matches with scores and reasoning
    """
    requirements_logger = setup_logger("src.requirements_analyzer.requirements_workflow", logging.DEBUG)
    
    try:
        print(f"\nStarting Requirements Processing")
        print("=" * 80)
        
        # Fetch requirements from Neo4j
        print(f"Fetching requirements from Neo4j...")
        requirements = await requirements_client.get_requirements()
        
        # Apply test mode limitations if enabled
        if CONFIG['TEST_MODE']:
            original_source_count = len(requirements['source'])
            original_target_count = len(requirements['target'])
            requirements['source'] = requirements['source'][:2]
            requirements['target'] = requirements['target'][:12]
            print(f"TEST MODE: Limited requirements")
            print(f"   Source: {original_source_count} → {len(requirements['source'])}")
            print(f"   Target: {original_target_count} → {len(requirements['target'])}")
        
        total_source = len(requirements['source'])
        total_target = len(requirements['target'])
        
        print(f"Processing Configuration:")
        print(f"   Source requirements: {total_source}")
        print(f"   Target requirements: {total_target}")
        print(f"   Max concurrent jobs: {CONFIG['MAX_CONCURRENT_JOBS']}")
        print(f"   Batch size: {CONFIG['BATCH_SIZE']}")
        
        # Calculate batch information
        total_batches = (total_target + CONFIG['BATCH_SIZE'] - 1) // CONFIG['BATCH_SIZE']  # Ceiling division
        all_matches = []
        
        requirements_logger.info(f"Starting processing of {total_source} source requirements against {total_target} target requirements")
        requirements_logger.info(f"Will process in {total_batches} batches of up to {CONFIG['BATCH_SIZE']} requirements each")
        
        print(f"\nProcessing {total_batches} batches...")
        print("-" * 60)
        
        # Process target requirements in batches
        for i in range(0, total_target, CONFIG['BATCH_SIZE']):
            target_batch = requirements['target'][i:i + CONFIG['BATCH_SIZE']]
            batch_num = i // CONFIG['BATCH_SIZE'] + 1
            batch_size_actual = len(target_batch)
            
            print(f"\nBatch {batch_num}/{total_batches}: Processing requirements {i+1} to {i+batch_size_actual}")
            requirements_logger.info(f"Processing target requirements batch {batch_num}/{total_batches} "
                                f"(requirements {i+1} to {i+batch_size_actual})")
            requirements_logger.debug(f"Batch {batch_num} contains {batch_size_actual} requirements")
            
            try:
                # Process the batch
                batch_matches = await workflow.process_requirements_batch(
                    source_requirements=requirements['source'],
                    target_requirements=target_batch,
                    max_concurrent=CONFIG['MAX_CONCURRENT_JOBS']
                )
                
                if batch_matches:
                    all_matches.extend(batch_matches)
                    requirements_logger.info(f"Batch {batch_num}/{total_batches} completed: found {len(batch_matches)} matches")
                    print(f"   Found {len(batch_matches)} matches in this batch")
                else:
                    requirements_logger.warning(f"Batch {batch_num}/{total_batches} completed: found no matches")
                    print(f"   No matches found in this batch")
                    
            except Exception as e:
                requirements_logger.error(f"Error processing batch {batch_num}/{total_batches}: {str(e)}", exc_info=True)
                print(f"   Error processing batch {batch_num}: {str(e)}")
                continue
        
        # Final summary
        print(f"\nProcessing Complete!")
        print("=" * 80)
        if all_matches:
            print(f"Found total of {len(all_matches)} requirement matches across all batches")
            
            # Display some statistics
            unique_sources = len(set(match['source_id'] for match in all_matches))
            unique_targets = len(set(match['target_id'] for match in all_matches))
            print(f"Statistics:")
            print(f"   Unique source requirements with matches: {unique_sources}")
            print(f"   Unique target requirements with matches: {unique_targets}")
            print(f"   Average matches per source: {len(all_matches) / unique_sources:.2f}")
        else:
            print("WARNING: No requirement matches found in any batch")
            
        return all_matches
        
    except Exception as e:
        requirements_logger.error(f"Error in process_requirements: {str(e)}", exc_info=True)
        print(f"Error in requirements processing: {str(e)}")
        raise

print("Helper functions defined successfully!")

In [None]:
# Cell [4] - Execute Requirements Processing
# Purpose: Run the main requirements processing workflow and display detailed results
# Dependencies: process_requirements function from Cell 3, logging configuration
# Breadcrumbs: Helper Functions -> Main Processing -> Results Display

def configure_logging_for_processing():
    """Configure logging levels for the processing phase"""
    # Configure console handler based on TEST_MODE setting
    for handler in logging.getLogger().handlers:
        if isinstance(handler, logging.StreamHandler):
            if not CONFIG['TEST_MODE']:  # Only adjust if not in test mode
                handler.setLevel(logging.INFO)
                print("Console logging level set to INFO to show evaluation progress")
            else:
                print("TEST MODE: Console showing DEBUG level messages")

    # Ensure workflow logger respects the TEST_MODE setting
    requirements_logger = logging.getLogger("src.requirements_analyzer.requirements_workflow")
    if not CONFIG['TEST_MODE']:
        requirements_logger.setLevel(logging.INFO)
    else:
        requirements_logger.setLevel(logging.DEBUG)

def display_detailed_results(results):
    """Display detailed results of the requirements processing"""
    print(f"\nDetailed Results Analysis")
    print("=" * 80)
    print(f"Total matches processed: {len(results)}")
    
    if not results:
        print("WARNING: No matches to display")
        return
    
    print(f"\nSample Results (showing first 3 matches):")
    print("-" * 80)
    
    for i, match in enumerate(results[:3], 1):
        print(f"\nMatch {i}:")
        print(f"   Source ID: {match['source_id']} → Target ID: {match['target_id']}")
        
        # Scores and reasoning
        print(f"\n   Actor Evaluation:")
        print(f"      Score: {match.get('actor_score', 0)}")
        print(f"      Reasoning: {match.get('actor_reasoning', 'No actor reasoning provided')[:200]}...")
        
        print(f"\n   Judge Evaluation:")
        print(f"      Score: {match.get('judge_score', 0)}")
        print(f"      Reasoning: {match.get('judge_reasoning', 'No judge reasoning provided')[:200]}...")
        
        # Match quality metrics
        match_quality = match.get('match_quality', {})
        print(f"\n   Match Quality Metrics:")
        print(f"      Semantic Alignment: {match_quality.get('semantic_alignment', 0)}")
        print(f"      Functional Completeness: {match_quality.get('functional_completeness', 0)}")
        print(f"      Non-functional Coverage: {match_quality.get('non_functional_coverage', 0)}")
        
        # Final results
        print(f"\n   Final Results:")
        print(f"      Final Score: {match.get('final_score', 0)}")
        print(f"      Is Traceable: {'YES' if match.get('is_traceable', False) else 'NO'}")
        print(f"      Meta-Judge Reasoning: {match.get('meta_judge_reasoning', 'No meta-judge reasoning provided')[:200]}...")
        
        print("-" * 60)
    
    if len(results) > 3:
        print(f"\n... and {len(results) - 3} more matches")
    
    # Summary statistics
    traceable_count = sum(1 for match in results if match.get('is_traceable', False))
    avg_final_score = sum(match.get('final_score', 0) for match in results) / len(results) if results else 0
    
    print(f"\nSummary Statistics:")
    print(f"   Total matches: {len(results)}")
    print(f"   Traceable matches: {traceable_count} ({traceable_count/len(results)*100:.1f}%)")
    print(f"   Average final score: {avg_final_score:.2f}")

# Configure logging for processing
configure_logging_for_processing()

# Execute the main requirements processing
print(f"\nStarting Main Requirements Processing")
print("=" * 80)

try:
    results = await process_requirements()
    
    # Display detailed results
    display_detailed_results(results)
    
    print(f"\nRequirements processing completed successfully!")
    
except Exception as e:
    print(f"Error during requirements processing: {str(e)}")
    logging.getLogger().error("Requirements processing failed", exc_info=True)
    raise

In [None]:
# Cell [5] - Store Results in Neo4j Database
# Purpose: Store the processed requirement matches and judgments in Neo4j for persistence
# Dependencies: results from Cell 4, neo4j_client from Cell 2, CONFIG from Cell 0
# Breadcrumbs: Results Processing -> Data Storage -> Neo4j Persistence

async def store_results_in_neo4j(results):
    """
    Store requirement matching results in Neo4j database
    
    Parameters:
        results: List of requirement matches with scores and reasoning
    """
    if not results:
        print("WARNING: No results to store")
        return
    
    print(f"\nStoring Results in Neo4j Database")
    print("=" * 80)
    print(f"Model: {CONFIG['MODEL_NAME']}")
    print(f"Results to store: {len(results)}")
    
    storage_logger = logging.getLogger("neo4j_storage")
    successful_stores = 0
    failed_stores = 0
    
    for i, match in enumerate(results, 1):
        try:
            await neo4j_client.store_judge_results(
                source_id=match['source_id'],
                target_id=match['target_id'],
                model_name=CONFIG['MODEL_NAME'],
                judgment_results=match
            )
            
            storage_logger.info(f"Stored match {match['source_id']}->{match['target_id']}")
            successful_stores += 1
            
            # Progress indicator for large datasets
            if i % 10 == 0 or i == len(results):
                print(f"   Progress: {i}/{len(results)} matches stored")
                
        except Exception as e:
            storage_logger.error(f"Failed to store match {match['source_id']}->{match['target_id']}: {str(e)}")
            failed_stores += 1
            continue
    
    # Storage summary
    print(f"\nStorage Summary:")
    print(f"   Successfully stored: {successful_stores}")
    print(f"   Failed to store: {failed_stores}")
    print(f"   Success rate: {successful_stores/(successful_stores+failed_stores)*100:.1f}%")
    
    if failed_stores > 0:
        print(f"WARNING: Some results failed to store. Check logs for details.")
    else:
        print(f"All results stored successfully!")

# Execute storage operation
try:
    await store_results_in_neo4j(results)
except Exception as e:
    print(f"Error during storage operation: {str(e)}")
    logging.getLogger().error("Failed to store results in Neo4j", exc_info=True)
    raise

In [None]:
# Cell [6] - Test Similarity Searches (Optional)
# Purpose: Test vector similarity searches to verify Redis indices and embeddings
# Dependencies: workflow indices from Cell 2, numpy and json from Cell 0
# Breadcrumbs: Data Storage -> Similarity Testing -> Index Verification

async def test_similarity_searches():
    """
    Test both source-to-source and source-to-target similarity searches for verification
    
    This function helps verify that:
    1. Redis indices are working correctly
    2. Embeddings are stored properly
    3. Vector similarity searches return expected results
    """
    similarity_logger = logging.getLogger("similarity_testing")
    
    try:
        print(f"\nTesting Similarity Searches")
        print("=" * 80)
        
        # Get all keys from Redis
        all_keys = await redis_client.client.keys("*")
        print(f"Total keys in Redis: {len(all_keys)}")
        
        # Count requirements by type
        src_keys = [k for k in all_keys if k.startswith(b'src_req:')]
        tgt_keys = [k for k in all_keys if k.startswith(b'tgt_req:')]
        
        print(f"Data Overview:")
        print(f"   Source Requirements: {len(src_keys)}")
        print(f"   Target Requirements: {len(tgt_keys)}")
        
        if not src_keys:
            print("WARNING: No source requirements found in Redis")
            return
        
        print(f"\nTesting with first source requirement...")
        print("-" * 60)
        
        # Test with the first source requirement only (for demonstration)
        src_key = src_keys[0]
        
        try:
            # Get source requirement details
            src_content = await redis_client.client.hget(src_key, "text")
            src_embedding = await redis_client.client.hget(src_key, "embedding")
            src_metadata = await redis_client.client.hget(src_key, "metadata")
            
            if src_content and src_embedding and src_metadata:
                print(f"Analyzing: {src_key.decode()}")
                print(f"Content preview: {src_content.decode()[:150]}...")
                
                # Convert binary embedding to numpy array
                src_embedding_vector = np.frombuffer(src_embedding, dtype=np.float32)
                print(f"Embedding dimension: {len(src_embedding_vector)}")
                
                # Test source-to-source similarity
                print(f"\nTesting Source-to-Source Similarity:")
                print("-" * 40)
                
                src_query = VectorQuery(
                    vector=src_embedding_vector.tolist(),
                    vector_field_name="embedding",
                    return_fields=["text", "metadata", "vector_distance"],
                    num_results=5  # Limit to top 5 for demonstration
                )
                
                src_results = await workflow.src_index.query(src_query)
                
                if not src_results:
                    print("ERROR: No source-to-source results found")
                    index_info = await workflow.src_index.info()
                    print(f"Source index info: {index_info}")
                else:
                    print(f"Found {len(src_results)} source matches:")
                    for rank, result in enumerate(src_results[:3], 1):  # Show top 3
                        try:
                            metadata = json.loads(result.get("metadata", "{}"))
                            vector_distance = float(result.get("vector_distance", 0.0))
                            similarity_score = 1 - vector_distance
                            print(f"   {rank}. ID: {metadata.get('id')} | Score: {similarity_score:.4f}")
                        except Exception as e:
                            similarity_logger.error(f"Error processing source result: {str(e)}")
                
                # Test source-to-target similarity
                print(f"\nTesting Source-to-Target Similarity:")
                print("-" * 40)
                
                tgt_query = VectorQuery(
                    vector=src_embedding_vector.tolist(),
                    vector_field_name="embedding",
                    return_fields=["text", "metadata", "vector_distance"],
                    num_results=5  # Limit to top 5 for demonstration
                )
                
                tgt_results = await workflow.tgt_index.query(tgt_query)
                
                if not tgt_results:
                    print("ERROR: No source-to-target results found")
                    index_info = await workflow.tgt_index.info()
                    print(f"Target index info: {index_info}")
                else:
                    print(f"Found {len(tgt_results)} target matches:")
                    for rank, result in enumerate(tgt_results[:3], 1):  # Show top 3
                        try:
                            metadata = json.loads(result.get("metadata", "{}"))
                            vector_distance = float(result.get("vector_distance", 0.0))
                            similarity_score = 1 - vector_distance
                            print(f"   {rank}. ID: {metadata.get('id')} | Score: {similarity_score:.4f}")
                        except Exception as e:
                            similarity_logger.error(f"Error processing target result: {str(e)}")
                
                print(f"\nSimilarity testing completed successfully!")
                
            else:
                print(f"ERROR: Missing data for source requirement: {src_key.decode()}")
                
        except Exception as e:
            similarity_logger.error(f"Error processing requirement {src_key.decode()}: {str(e)}", exc_info=True)
            print(f"ERROR processing requirement: {str(e)}")
                
    except Exception as e:
        similarity_logger.error(f"Error in similarity testing: {str(e)}", exc_info=True)
        print(f"Error in similarity testing: {str(e)}")

# Optionally run similarity tests (useful for debugging)
run_similarity_tests = input("Run similarity tests for verification? (y/N): ").lower().startswith('y')

if run_similarity_tests:
    print("Starting similarity search tests...")
    await test_similarity_searches()
else:
    print("Skipping similarity tests")

In [None]:
# Cell [7] - Cleanup Connections and Finalize
# Purpose: Properly close all database connections and finalize the workflow
# Dependencies: Clients from Cell 2
# Breadcrumbs: Processing Complete -> Connection Cleanup -> Finalization

async def cleanup_connections():
    """
    Properly close all database connections and clean up resources
    """
    cleanup_logger = logging.getLogger("cleanup")
    cleanup_errors = []
    
    print(f"\nCleaning Up Connections")
    print("=" * 80)
    
    # Close Neo4j connection
    try:
        await neo4j_client.close()
        cleanup_logger.info("Neo4j connection closed successfully")
        print("Neo4j connection closed successfully")
    except Exception as e:
        error_msg = f"Neo4j connection cleanup failed: {str(e)}"
        cleanup_logger.error(f"{error_msg}")
        print(f"Error closing Neo4j connection: {str(e)}")
        cleanup_errors.append(error_msg)

    # Close Redis connection
    try:
        await redis_client.client.aclose()
        cleanup_logger.info("Redis connection closed successfully")
        print("Redis connection closed successfully")
    except Exception as e:
        error_msg = f"Redis connection cleanup failed: {str(e)}"
        cleanup_logger.error(f"{error_msg}")
        print(f"Error closing Redis connection: {str(e)}")
        cleanup_errors.append(error_msg)
    
    # Final status
    if cleanup_errors:
        print(f"\nCleanup completed with {len(cleanup_errors)} errors")
        for error in cleanup_errors:
            print(f"   • {error}")
    else:
        print(f"\nAll connections cleaned up successfully")
    
    return len(cleanup_errors) == 0

# Execute cleanup
cleanup_success = await cleanup_connections()

# Final summary
print(f"\nRequirements Judging Workflow Complete!")
print("=" * 80)
print(f"Model Used: {CONFIG['MODEL_NAME']}")
print(f"Project: {CONFIG['NEO4J_PROJECT_NAME']}")
print(f"Test Mode: {CONFIG['TEST_MODE']}")
print(f"Results Processed: {len(results) if 'results' in locals() else 'N/A'}")
print(f"Log File: {CONFIG['LOG_FILENAME']}")
print(f"Cleanup Status: {'Success' if cleanup_success else 'With Warnings'}")

# Log final completion
completion_logger = logging.getLogger("completion")
completion_logger.info(f"Requirements judging workflow completed successfully")
completion_logger.info(f"Model: {CONFIG['MODEL_NAME']}, Results: {len(results) if 'results' in locals() else 'N/A'}")

print(f"\nNext Steps:")
print(f"   • Check the log file for detailed processing information")
print(f"   • Review stored results in Neo4j database")
print(f"   • Analyze traceability metrics using the results")
print(f"\nWorkflow completed!")

In [None]:
# Cell [8] - Analyze Unique Source Requirements
# Purpose: Count unique source requirements that have matches
# Dependencies: results from Cell 4
# Breadcrumbs: Results Analysis -> Source Coverage Analysis

unique_sources = len(set(match['source_id'] for match in results)) if 'results' in locals() else 0
print(f"Unique source requirements with matches: {unique_sources}")
unique_sources

In [None]:
# Cell [9] - Analyze Unique Target Requirements
# Purpose: Count unique target requirements that have matches
# Dependencies: results from Cell 4
# Breadcrumbs: Results Analysis -> Target Coverage Analysis

unique_targets = len(set(match['target_id'] for match in results)) if 'results' in locals() else 0
print(f"Unique target requirements with matches: {unique_targets}")
unique_targets

In [None]:
# Cell [10] - Total Results Count
# Purpose: Display total number of requirement matches found
# Dependencies: results from Cell 4
# Breadcrumbs: Results Analysis -> Total Count

total_results = len(results) if 'results' in locals() else 0
print(f"Total requirement matches: {total_results}")
total_results

In [None]:
# Cell [11] - Display Complete Results
# Purpose: Show the complete results dataset for detailed examination
# Dependencies: results from Cell 4
# Breadcrumbs: Results Analysis -> Complete Dataset View

if 'results' in locals():
    print(f"Complete Results Dataset ({len(results)} matches):")
    print("=" * 60)
    if results:
        print("Results available for examination")
        print(f"Sample keys: {list(results[0].keys()) if results else 'N/A'}")
    else:
        print("WARNING: Results list is empty")
    results
else:
    print("ERROR: Results variable not found. Please run Cell 4 first.")
    None