# Requirements Analysis with Claude Enhancement
**Analyzes requirement similarities using sentence transformers and enhances results with Claude AI for improved requirement association detection.**


In [None]:
# Cell [0] - Setup and Imports
# Purpose: Import all required libraries and configure environment settings for Multi-LLM testing
# Dependencies: os, sys, pathlib, dotenv, datetime, praxis_sentence_transformer
# Breadcrumbs: Setup -> Imports -> Environment Configuration

import os
import sys
from pathlib import Path
from dotenv import load_dotenv
from datetime import datetime
import logging

# Import from praxis_sentence_transformer package (installed via pip)
from praxis_sentence_transformer.neo4j_operations import Neo4jClient
from praxis_sentence_transformer.analysis.analyzer import RequirementsAnalyzer
from praxis_sentence_transformer.clients.claude import ClaudeRequirementAnalyzer, RequirementMatch, AnalysisResult
from praxis_sentence_transformer.logger import setup_logging, DebugTimer

# Set up logging
logger = setup_logging("neo4j-notebook", logging.DEBUG)

# Load environment variables
load_dotenv()

In [None]:
# Cell [1] - Neo4j Connection Setup
# Purpose: Initialize Neo4j client and establish database connection for requirements analysis
# Dependencies: Neo4jClient, logger, sys
# Breadcrumbs: Setup -> Database Connection -> Neo4j Client Initialization

client = Neo4jClient()

# Test connection
if not client.connect():
    logger.error("Failed to connect to Neo4j database")
    sys.exit(1)

In [None]:
# Cell [2] - Configuration and Analyzer Setup
# Purpose: Configure model parameters and initialize requirements analyzer for processing
# Dependencies: Path, logger, RequirementsAnalyzer
# Breadcrumbs: Setup -> Configuration -> Analyzer Initialization

# Model configuration
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
alpha = 0.6
threshold = 0.4

# Create results directory
results_dir = Path("results/neo4j_analysis")
results_dir.mkdir(parents=True, exist_ok=True)

logger.info(f"Using model: {model_name}")
logger.info(f"Alpha: {alpha}")
logger.info(f"Threshold: {threshold}")

# Initialize analyzer
analyzer = RequirementsAnalyzer(
    neo4j_client=client,
    model_name=model_name,
    alpha=alpha,
    threshold=threshold
)

In [None]:
# Cell [3] - Run Requirements Analysis
# Purpose: Execute the main requirements analysis process and collect statistics
# Dependencies: DebugTimer, analyzer
# Breadcrumbs: Setup -> Analysis Execution -> Requirements Processing

with DebugTimer("Running requirements analysis"):
    stats = analyzer.analyze_requirements(results_dir)

print("\nAnalysis Statistics:")
for key, value in stats.items():
    print(f"{key}: {value}")

In [None]:
# Cell [4] - Example Analysis Display
# Purpose: Display detailed analysis results for first requirement match as example
# Dependencies: analyzer, stats
# Breadcrumbs: Analysis Execution -> Results Display -> Example Match Visualization

# Show detailed analysis for first requirement with matches
matches = stats.get("requirements_with_matches", 0)
if matches > 0:
    print("\nDetailed Analysis Example:")
    example_data = analyzer.get_example_match()
    if example_data:
        print(f"\nSource Requirement ID: {example_data['source']['id']}")
        print(f"Source Content: {example_data['source']['content']}")
        print(f"\nMatching Requirements ({len(example_data['targets'])}):")
        for i, target in enumerate(example_data['targets'], 1):
            print(f"\nMatch {i}:")
            print(f"Target ID: {target['id']}")
            print(f"Similarity Score: {target['similarity']:.3f}")
            print(f"Content: {target['content']}")

In [None]:
# Cell [5] - Claude Analyzer Initialization
# Purpose: Initialize Claude AI analyzer for enhanced requirement association analysis
# Dependencies: ClaudeRequirementAnalyzer, RequirementMatch, AnalysisResult, logging, os
# Breadcrumbs: Analysis Execution -> AI Enhancement -> Claude Integration Setup

# Enable debug logging
logging.getLogger('praxis_sentence_transformer.clients.claude.requirements_analyzer').setLevel(logging.DEBUG)

logger.info("Initializing Claude analyzer...")

# Initialize analyzer with settings from environment variables
claude_analyzer = ClaudeRequirementAnalyzer(
    model=os.getenv('CLAUDE_3_5_MODEL', 'claude-3-sonnet-20240229'),
    min_association_probability=float(os.getenv('MIN_COVERAGE_THRESHOLD', '0.6'))
)

# Use example_data directly since we already have it
requirement_matches = []
if example_data:
    source_id = example_data['source']['id']
    source_content = example_data['source']['content']
    
    for target in example_data['targets']:
        target_id = target['id']
        target_content = target['content']
        similarity = target['similarity']
        
        # Only analyze pairs above similarity threshold
        if similarity >= threshold:
            # Create RequirementMatch object using the dataclass constructor
            match = RequirementMatch(
                source_id=source_id,
                source_content=source_content,
                target_id=target_id,
                target_content=target_content,
                similarity_score=similarity,
                association_probability=0.0,  # Will be set by Claude
                is_associated=False,  # Will be set by Claude
                explanation="",  # Will be set by Claude
                timestamp=datetime.now()
            )
            requirement_matches.append(match)

logger.info(f"Created {len(requirement_matches)} requirement matches for analysis")

# Analyze requirements using Claude
with DebugTimer("Running Claude analysis"):
    results = claude_analyzer.analyze_requirement_matches(requirement_matches)

# Save results
output_dir = results_dir / "claude_analysis"
claude_analyzer.save_results(results, output_dir)

# Print summary
associated_matches = [r for r in results if r.is_associated]
print(f"\nClaude Analysis Summary:")
print(f"Total matches analyzed: {len(requirement_matches)}")
print(f"Associated matches found: {len(associated_matches)}")
if results:
    avg_prob = sum(r.association_probability for r in results)/len(results)
    print(f"Average association probability: {avg_prob:.3f}")
print(f"\nResults saved to: {output_dir}")

# Display example matches
if associated_matches:
    print("\nExample Associated Requirements:")
    for match in associated_matches[:5]:  # Show first 5 matches
        print(f"\nSource {match.source_id} -> Target {match.target_id}")
        print(f"Association Probability: {match.association_probability:.3f}")
        print(f"Explanation: {match.explanation}")

In [None]:
# Cell [6] - Batch Requirements Processing with Claude
# Purpose: Process all requirement matches from Neo4j using Claude for comprehensive analysis
# Dependencies: ClaudeRequirementAnalyzer, RequirementMatch, datetime, client, logger
# Breadcrumbs: AI Enhancement -> Batch Processing -> Claude Analysis Pipeline

# Initialize Claude analyzer
claude_analyzer = ClaudeRequirementAnalyzer()

# Get model configuration from environment
model_name = eval(os.getenv('MODEL_LIST', '["sentence-transformers/multi-qa-mpnet-base-dot-v1"]'))[0]
alpha = float(eval(os.getenv('ALPHA_VALUES', '[0.6]'))[0])
threshold = float(eval(os.getenv('THRESHOLD_RANGE', '[0.4]'))[0])

# Get requirement matches from Neo4j using existing client
raw_matches = client.get_requirement_matches(
    model_name=model_name,
    alpha=alpha,
    threshold=threshold
)

logger.info(f"Retrieved {len(raw_matches)} raw matches from Neo4j")

# Debug the structure of raw matches
if raw_matches:
    logger.debug(f"Sample raw match structure: {raw_matches[0]}")

# Convert dictionary matches to RequirementMatch objects
requirement_matches = []
for match in raw_matches:
    try:
        # Extract source and target info from match structure
        source = match.get('source', {})
        target = match.get('target', {})
        
        if not source or not target:
            logger.warning(f"Skipping match due to missing source or target: {match}")
            continue
            
        # Create RequirementMatch object using the dataclass constructor
        req_match = RequirementMatch(
            source_id=source.get('id'),
            source_content=source.get('content'),
            target_id=target.get('id'),
            target_content=target.get('content'),
            similarity_score=match.get('similarity', 0.0),
            association_probability=0.0,  # Will be set by Claude
            is_associated=False,  # Will be set by Claude
            explanation="",  # Will be set by Claude
            timestamp=datetime.now()
        )
        requirement_matches.append(req_match)
        
    except Exception as e:
        logger.error(f"Error processing match: {str(e)}")
        logger.debug(f"Problematic match structure: {match}")
        continue

logger.info(f"Converted {len(requirement_matches)} matches to RequirementMatch objects")

# Analyze matches with Claude
analyzed_matches = claude_analyzer.analyze_requirement_matches(requirement_matches)

# Save results
output_dir = results_dir / "claude_analysis"
claude_analyzer.save_results(analyzed_matches, output_dir)

# Print summary statistics
associated_matches = [m for m in analyzed_matches if m.is_associated]
print(f"\nAnalysis Summary:")
print(f"Total matches analyzed: {len(analyzed_matches)}")
print(f"Associated matches found: {len(associated_matches)}")
if analyzed_matches:
    avg_prob = sum(m.association_probability for m in analyzed_matches)/len(analyzed_matches)
    print(f"Average association probability: {avg_prob:.3f}")

# Display example matches
if associated_matches:
    print("\nExample Associated Requirements:")
    for match in associated_matches[:5]:  # Show first 5 matches
        print(f"\nSource {match.source_id} -> Target {match.target_id}")
        print(f"Association Probability: {match.association_probability:.3f}")
        print(f"Explanation: {match.explanation}")

In [None]:
# Cell [7] - Cleanup and Session Termination
# Purpose: Close database connections and finalize analysis session
# Dependencies: client, logger
# Breadcrumbs: Analysis Pipeline -> Session Management -> Resource Cleanup

client.close()
logger.info("Analysis complete")