# Multi-LLM Requirements Analysis Testing
**Testing of multiple LLM models for requirements analysis workflows using Neo4j data and the LLM Manager framework.**

In [None]:
# Cell [0] - Setup and Imports
# Purpose: Import all required libraries and configure environment settings for Multi-LLM testing
# Dependencies: asyncio, sys, os, logging, pathlib, dotenv, transformers, src modules
# Breadcrumbs: Setup -> Imports -> Environment Configuration

import asyncio
import sys
import os
import logging
from pathlib import Path
from dotenv import load_dotenv

def setup_environment():
    """
    Configure Python path, logging, and load environment variables
    
    Returns:
        dict: Configuration parameters including MODEL_TEST flag and project paths
    """
    # Get the absolute path to the project root directory (parent of notebooks)
    project_root = Path.cwd().parent
    src_path = project_root / 'src'
    
    # Add the project root to Python path if not already there
    if str(project_root) not in sys.path:
        sys.path.insert(0, str(project_root))
    
    # Load environment variables
    load_dotenv()
    
    # Configuration from environment variables
    config = {
        'PROJECT_ROOT': project_root,
        'SRC_PATH': src_path,
        'MODEL_TEST': os.getenv('MODEL_TEST', 'False').lower() in ('true', '1', 't'),
        'NEO4J_URI': os.getenv('NEO4J_URI'),
        'NEO4J_USER': os.getenv('NEO4J_USER'), 
        'NEO4J_PASSWORD': os.getenv('NEO4J_PASSWORD'),
        'NEO4J_DATABASE': os.getenv('NEO4J_DATABASE'),
        'NEO4J_PROJECT_NAME': os.getenv('NEO4J_PROJECT_NAME')
    }
    
    print(f"Project root added to path: {project_root}")
    print(f"MODEL_TEST environment variable: {config['MODEL_TEST']}")
    
    return config

# Execute setup when imported
CONFIG = setup_environment()

# Import project modules after path setup
from praxis_requirements_analyzer.utils.logger import setup_logger
from praxis_requirements_analyzer.llm.manager.llm_manager import LLMManager
from praxis_requirements_analyzer.neo4j import Neo4jClient, RequirementsClient, SchemaExtractor
from praxis_requirements_analyzer.requirements_analyzer.requirements_workflow import RequirementsWorkflow
from praxis_requirements_analyzer.requirements_analyzer.requirements_prompt_manager import RequirementsPromptManager

# Set up debug logging for requirements analysis
requirements_logger = setup_logger("praxis_requirements_analyzer.requirements_analyzer.requirements_workflow", logging.DEBUG)
prompt_logger = setup_logger("praxis_requirements_analyzer.requirements_analyzer.requirements_prompt_manager", logging.DEBUG)

# Initialize LLM Manager
llm_manager = LLMManager()

# Optional: Import transformers for token estimation if MODEL_TEST is enabled
if CONFIG['MODEL_TEST']:
    try:
        from transformers import AutoTokenizer
        print("Transformers library imported for token estimation")
    except ImportError:
        print("Warning: transformers library not available for token estimation")
        
print("Setup completed successfully!")


In [None]:
# Cell [1] - Initialize LLM Models and Run Basic Tests
# Purpose: Initialize the LLM Manager and run basic functionality tests on all models
# Dependencies: LLMManager, transformers (if MODEL_TEST=True)
# Breadcrumbs: Setup -> LLM Initialization -> Model Testing

# Initialize LLM models
await llm_manager.initialize_models()

if CONFIG['MODEL_TEST']:
    def estimate_tokens(text: str, model_name: str = "HuggingFaceH4/zephyr-7b-beta") -> int:
        """Estimate the number of tokens in a text string"""
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            return len(tokenizer.encode(text))
        except Exception as e:
            print(f"Error estimating tokens: {e}")
            return len(text.split()) * 2  # Rough fallback estimate

    # Test prompt and system message
    test_prompt = "What is 2 + 2? Please provide only the direct answer."
    system_message = "You are a calculator. Provide only the numerical answer."

    # Calculate total tokens needed
    total_tokens = estimate_tokens(test_prompt + system_message)
    print(f"Total tokens: {total_tokens}")
    max_tokens = min(512, total_tokens * 2)  # Set limit to 2x input tokens, max 512
    print(f"Max tokens: {max_tokens}")

    # Function to run tests
    async def test_all_models():
        tasks = []
        for model_name in llm_manager.models.keys():
            task = llm_manager.test_model(
                model_name=model_name,
                prompt=test_prompt,
                system_message=system_message
            )
            tasks.append(task)
        
        results = await asyncio.gather(*tasks)
        return results

    # Run the tests
    results = await test_all_models()

    # Display results
    for result in results:
        print(f"\nModel: {result['model']}")
        print("-" * 50)
        if "error" in result:
            print(f"Error: {result['error']}")
        else:
            print(f"Response: {result['response']}")
        print("\n")
else:
    print("MODEL_TEST is set to False - skipping LLM testing")
    print("Set MODEL_TEST=True in .env file to enable model testing")

In [None]:
# Cell [2] - Fetch Requirements from Neo4j Database
# Purpose: Connect to Neo4j database and retrieve source and target requirements
# Dependencies: Neo4jClient, RequirementsClient
# Breadcrumbs: Setup -> Database Connection -> Requirements Retrieval

async def fetch_requirements():
    """
    Fetch requirements from Neo4j database using configured connection parameters
    
    Returns:
        dict: Dictionary containing 'source' and 'target' requirement lists
    """
    try:
        # Initialize Neo4j client using configuration from setup
        neo4j_client = Neo4jClient(
            uri=CONFIG["NEO4J_URI"],
            user=CONFIG["NEO4J_USER"],
            password=CONFIG["NEO4J_PASSWORD"],
            database=CONFIG["NEO4J_DATABASE"]
        )

        # Connect to Neo4j
        neo4j_client.connect()
        print(f"Connected to Neo4j database: {CONFIG['NEO4J_DATABASE']}")

        # Initialize requirements client
        requirements_client = RequirementsClient(
            neo4j_client=neo4j_client,
            project_name=CONFIG["NEO4J_PROJECT_NAME"]
        )

        try:
            # Fetch requirements
            requirements = await requirements_client.get_requirements()

            # Display results summary
            print(f"\nRequirements Summary for Project: {CONFIG['NEO4J_PROJECT_NAME']}")
            print("=" * 80)
            print(f"Source requirements: {len(requirements['source'])}")
            print(f"Target requirements: {len(requirements['target'])}")

            # Show sample source requirements
            if requirements['source']:
                print(f"\nSample Source Requirements (showing first 3):")
                print("-" * 50)
                for i, req in enumerate(requirements['source'][:3]):
                    print(f"\n{i+1}. ID: {req.id}")
                    print(f"   Type: {req.type}")
                    print(f"   Content: {req.content[:150]}...")

            # Show sample target requirements
            if requirements['target']:
                print(f"\nSample Target Requirements (showing first 3):")
                print("-" * 50)
                for i, req in enumerate(requirements['target'][:3]):
                    print(f"\n{i+1}. ID: {req.id}")
                    print(f"   Type: {req.type}")
                    print(f"   Content: {req.content[:150]}...")

            return requirements

        finally:
            # Close Neo4j connection when done
            neo4j_client.close()
            print("\nNeo4j connection closed successfully")

    except Exception as e:
        print(f"Error fetching requirements: {str(e)}")
        raise

# Execute the fetch operation
requirements = await fetch_requirements()

In [None]:
# Cell [3] - Filter and Display Source Requirements
# Purpose: Filter requirements to get only SOURCE type requirements
# Dependencies: requirements data from Cell 2
# Breadcrumbs: Requirements Retrieval -> Source Filtering -> Display

# Filter to get only SOURCE type requirements
source_requirements = {
    'source': list(filter(lambda req: req.type == 'SOURCE', requirements['source']))
}

print(f"Filtered Source Requirements:")
print("=" * 50)
print(f"Total SOURCE requirements: {len(source_requirements['source'])}")

# Display first few source requirements for verification
if source_requirements['source']:
    print(f"\nFirst 3 Source Requirements:")
    print("-" * 40)
    for i, req in enumerate(source_requirements['source'][:3]):
        print(f"\n{i+1}. ID: {req.id}")
        print(f"   Type: {req.type}")
        print(f"   Content: {req.content[:100]}...")

source_requirements


In [None]:
# Cell [4] - Filter and Display Target Requirements
# Purpose: Filter requirements to get only TARGET type requirements
# Dependencies: requirements data from Cell 2
# Breadcrumbs: Requirements Retrieval -> Target Filtering -> Display

# Filter to get only TARGET type requirements
target_requirements = {
    'target': list(filter(lambda req: req.type == 'TARGET', requirements['target']))
}

print(f"Filtered Target Requirements:")
print("=" * 50)
print(f"Total TARGET requirements: {len(target_requirements['target'])}")

# Display first few target requirements for verification
if target_requirements['target']:
    print(f"\nFirst 3 Target Requirements:")
    print("-" * 40)
    for i, req in enumerate(target_requirements['target'][:3]):
        print(f"\n{i+1}. ID: {req.id}")
        print(f"   Type: {req.type}")
        print(f"   Content: {req.content[:100]}...")

target_requirements

In [None]:
# Cell [5] - Requirements Analysis Workflow Testing
# Purpose: Test the requirements analysis workflow using multiple LLM models
# Dependencies: RequirementsWorkflow, RequirementsPromptManager, filtered requirements
# Breadcrumbs: Requirements Filtering -> Workflow Initialization -> Multi-Model Analysis

# Initialize prompt manager with default settings
prompt_manager = RequirementsPromptManager()

# Create workflow instance using our existing llm_manager
workflow = RequirementsWorkflow(llm_manager, prompt_manager)

# Get first 3 requirements of each type for testing
source_reqs = source_requirements['source'][:3]
target_reqs = target_requirements['target'][:3]

print(f"Requirements Analysis Configuration:")
print("=" * 80)
print(f"Source requirements to test: {len(source_reqs)}")
print(f"Target requirements to test: {len(target_reqs)}")
print(f"Available LLM models: {list(llm_manager.models.keys())}")

async def process_requirements(reqs, req_type="Source"):
    """
    Process requirements using the analysis workflow with multiple LLM models
    
    Parameters:
        reqs: List of requirements to process
        req_type: Type identifier for display purposes ("Source" or "Target")
    """
    print(f"\nTesting {req_type} Requirements:")
    print("-" * 80)
    
    for i, req in enumerate(reqs, 1):
        print(f"\nProcessing {req_type} Requirement {i}/{len(reqs)} - ID: {req.id}")
        print(f"Content Preview: {req.content[:200]}...")
        print("-" * 60)
        
        try:
            # Run analysis workflow on the requirement
            results = await workflow.test_models(req)
            
            # Display results for each model
            for model_name, result in results.items():
                print(f"\nModel: {model_name}")
                print("=" * 40)
                
                if "error" in result:
                    print(f"ERROR: {result['error']}")
                    continue
                
                # Process each analysis type
                analysis_types = [
                    "verification",
                    "validation", 
                    "quality",
                    "structure",
                    "matching"
                ]
                
                analysis_completed = 0
                for analysis_type in analysis_types:
                    analysis_result = result.get(analysis_type)
                    if analysis_result:
                        analysis_completed += 1
                        print(f"\n{analysis_type.title()} Analysis:")
                        print("-" * 30)
                        print(f"Model Used: {analysis_result['model']}")
                        
                        # Format response for better readability
                        response = analysis_result['response']
                        if len(response) > 150:
                            print(f"Response: {response[:150]}... [truncated]")
                        else:
                            print(f"Response: {response}")
                
                print(f"\nAnalysis Summary: {analysis_completed}/{len(analysis_types)} completed")
                print("=" * 40)
                
        except Exception as e:
            print(f"ERROR processing requirement {req.id}: {str(e)}")

print("\nStarting Requirements Analysis Test")
print("=" * 80)

# Process both source and target requirements
if source_reqs:
    await process_requirements(source_reqs, "Source")
else:
    print("WARNING: No source requirements available for testing")

if target_reqs:
    await process_requirements(target_reqs, "Target")
else:
    print("WARNING: No target requirements available for testing")

print("\nRequirements Analysis Test Complete")
print("=" * 80)