# Multi-Modal Requirements Analysis Pipeline
**Combines sentence transformer similarity scores with machine learning classifiers and Claude AI analysis for advanced requirement association detection.**


In [None]:
# Cell [0] - Setup and Imports
# Purpose: Import all required libraries and configure environment settings for Multi-LLM testing
# Dependencies: os, sys, json, pathlib, dotenv, datetime, pandas, praxis_sentence_transformer
# Breadcrumbs: Setup -> Imports -> Environment Configuration

import os
import sys
import json
from pathlib import Path
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd
import logging
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, fbeta_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA

# Import from praxis_sentence_transformer package (installed via pip)
from praxis_sentence_transformer.neo4j_operations import Neo4jClient
from praxis_sentence_transformer.analysis.analyzer import RequirementsAnalyzer
from praxis_sentence_transformer.clients.claude import ClaudeRequirementAnalyzer
from praxis_sentence_transformer.logger import setup_logging, DebugTimer
from praxis_sentence_transformer.visualization import RequirementsVisualizer

# Set up logging
logger = setup_logging("neo4j-notebook", logging.ERROR)

# Load environment variables
load_dotenv()

In [None]:
# Cell [1] - Environment Variables Validation and Neo4j Client Initialization
# Purpose: Validate environment variables and establish Neo4j database connection
# Dependencies: os, logger, Neo4jClient, sys
# Breadcrumbs: Environment Configuration -> Validation -> Database Connection

# Display loaded environment variables (without showing sensitive values)
env_vars = {
    'NEO4J_URI': '✓' if os.getenv('NEO4J_URI') else '✗',
    'NEO4J_USER': '✓' if os.getenv('NEO4J_USER') else '✗',
    'NEO4J_PASSWORD': '✓' if os.getenv('NEO4J_PASSWORD') else '✗',
    'ANTHROPIC_API_KEY': '✓' if os.getenv('ANTHROPIC_API_KEY') else '✗',
    'HF_TOKEN': '✓' if os.getenv('HF_TOKEN') else '✗'
}

logger.info("Environment variables loaded:")
for var, status in env_vars.items():
    logger.info(f"{var}: {status}")

# Get Neo4j credentials from environment variables
neo4j_uri = os.getenv('NEO4J_URI')
neo4j_user = os.getenv('NEO4J_USER')
neo4j_password = os.getenv('NEO4J_PASSWORD')

# Verify all required credentials are present
if not all([neo4j_uri, neo4j_user, neo4j_password]):
    logger.error("Missing required Neo4j credentials in environment variables")
    sys.exit(1)

# Initialize Neo4j Client with credentials
neo4j_client = Neo4jClient(
    uri=neo4j_uri,
    username=neo4j_user,
    password=neo4j_password
)

# Test connection
if not neo4j_client.connect():
    logger.error("Failed to connect to Neo4j database")
    sys.exit(1)

In [None]:
# Cell [2] - Model Configuration and Results Directory Setup
# Purpose: Configure model parameters and create output directories for analysis results
# Dependencies: Path, logger, os
# Breadcrumbs: Database Connection -> Configuration -> Parameter Setup

# Model configuration
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
alpha = 0.6
threshold = 0.4
project_name = os.getenv('PROJECT_NAME')

# Create results directory
results_dir = Path("../results/neo4j_analysis")
results_dir.mkdir(parents=True, exist_ok=True)

logger.info(f"Using model: {model_name}")
logger.info(f"Alpha: {alpha}")
logger.info(f"Threshold: {threshold}")
logger.info(f"Project Name: {project_name}")

In [None]:
# Cell [3] - Similarity Data Retrieval and Analysis
# Purpose: Retrieve and analyze similarity data from Neo4j database for requirement pairs
# Dependencies: neo4j_client, logger, pandas, json
# Breadcrumbs: Parameter Setup -> Data Retrieval -> Similarity Analysis

# Get similarity data
similarity_results = neo4j_client.get_requirement_similarity_data(project_name=project_name)

# Log summary statistics
logger.info(f"Project: {similarity_results['metadata']['project_name']}")
logger.info(f"Total pairs analyzed: {similarity_results['metadata']['total_pairs']}")
logger.info(f"Ground truth pairs: {similarity_results['metadata']['ground_truth_pairs']}")
logger.info(f"Models found: {similarity_results['metadata']['models']}")

# Show sample data for first few requirement pairs
df = similarity_results['data']
for _, row in df.head(3).iterrows():  # First 3 pairs
    logger.info(f"\nSource {row['source_id']}:")
    logger.info(f"Content: {row['source_content'][:100]}...")
    logger.info(f"Target {row['target_id']}:")
    logger.info(f"Content: {row['target_content'][:100]}...")
    logger.info(f"Ground Truth: {row['is_ground_truth']}")
    
    # Log similarity scores for each model
    for model in similarity_results['metadata']['models']:
        if model in row:  # Check if model exists in row
            logger.info(f"{model} similarity: {row[model]:.3f}")

# Create sample structure with first 3 pairs
sample_data = {
    "metadata": similarity_results["metadata"],
    "pairs": [
        {
            "source": {
                "id": row['source_id'],
                "content": row['source_content']
            },
            "target": {
                "id": row['target_id'],
                "content": row['target_content']
            },
            "is_ground_truth": row['is_ground_truth'],
            "model_scores": {
                model: row[model] 
                for model in similarity_results['metadata']['models']
                if model in row and not pd.isna(row[model])
            }
        }
        for _, row in df.head(3).iterrows()
    ]
}

# Log sample structure
logger.debug("Sample data structure:")
logger.debug(json.dumps(sample_data, indent=2))

In [None]:
# Cell [4] - Similarity Results Display
# Purpose: Display the retrieved similarity results for inspection and validation
# Dependencies: similarity_results
# Breadcrumbs: Similarity Analysis -> Data Inspection -> Results Validation

similarity_results

In [None]:
# Cell [5] - DataFrame Head Inspection
# Purpose: Inspect the first few rows of the DataFrame for data structure validation
# Dependencies: df (DataFrame from similarity_results)
# Breadcrumbs: Results Validation -> Data Structure -> DataFrame Inspection

df.head()

In [None]:
# Cell [6] - DataFrame Size Check
# Purpose: Check the total number of records in the DataFrame for dataset size validation
# Dependencies: df
# Breadcrumbs: DataFrame Inspection -> Size Validation -> Data Quality Check

len(df)

In [None]:
# Cell [7] - Correlation Analysis
# Purpose: Analyze correlations between model scores and ground truth labels for feature evaluation
# Dependencies: pandas, df
# Breadcrumbs: Data Quality Check -> Statistical Analysis -> Correlation Assessment

# Assuming df is your DataFrame containing the data

# Drop non-numeric columns
numeric_df = df.drop(columns=['source_id', 'target_id', 'source_content', 'target_content'])

# Calculate the correlation matrix
correlation_matrix = numeric_df.corr()

# Get the correlation with 'is_ground_truth'
ground_truth_correlation = correlation_matrix['is_ground_truth'].drop('is_ground_truth')
print(ground_truth_correlation)

In [None]:
# Cell [8] - Logistic Regression Model Training and Evaluation
# Purpose: Train and evaluate logistic regression model for requirement similarity classification
# Dependencies: sklearn, matplotlib, pandas, logger
# Breadcrumbs: Correlation Assessment -> Model Training -> Logistic Regression Analysis

# Prepare the data
X = df.drop(columns=['is_ground_truth', 'source_id', 'target_id', 'source_content', 'target_content'])
y = df['is_ground_truth'].astype(int)

# Calculate and log the correlation matrix
correlation_matrix = X.corr()
logger.info("Correlation matrix:")
logger.debug(correlation_matrix.to_string())

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the sizes of the training and testing sets
logger.info(f"Training set size: {X_train.shape[0]}, Testing set size: {X_test.shape[0]}")

# Train the model with class weights
model = LogisticRegression(class_weight='balanced')
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))

# Calculate F2 score
f2_score = fbeta_score(y_test, y_pred, beta=2)
print(f"F2 Score: {f2_score:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
logger.info("Confusion Matrix:")
logger.debug(conf_matrix)

# Extract TP, FN, FP, TN from the confusion matrix
TN, FP, FN, TP = conf_matrix.ravel()
logger.info(f"True Positives (TP): {TP}")
logger.info(f"False Negatives (FN): {FN}")
logger.info(f"False Positives (FP): {FP}")
logger.info(f"True Negatives (TN): {TN}")

# Verify the total count in the confusion matrix
total_count = conf_matrix.sum()
logger.info(f"Total count in confusion matrix: {total_count}, Expected: {y_test.shape[0]}")

# Feature importance analysis
coefficients = model.coef_[0]  # Get the coefficients for the logistic regression model
feature_importance = pd.Series(coefficients, index=X.columns).sort_values(ascending=False)

# Display the feature importance
print("Feature Importance (Sentence Transformers):")
print(feature_importance)

# Optionally, plot the feature importance
plt.figure(figsize=(10, 6))
feature_importance.plot(kind='bar')
plt.title('Feature Importance of Sentence Transformers')
plt.xlabel('Sentence Transformers')
plt.ylabel('Coefficient Value')
plt.show()

In [None]:
# Cell [9] - Random Forest Model Training and Feature Importance
# Purpose: Train Random Forest classifier and analyze feature importance for model interpretation
# Dependencies: sklearn, pandas, matplotlib
# Breadcrumbs: Logistic Regression Analysis -> Ensemble Methods -> Random Forest Analysis

# Prepare the data
X = df.drop(columns=['is_ground_truth', 'source_id', 'target_id', 'source_content', 'target_content'])
y = df['is_ground_truth'].astype(int)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))

# Calculate F2 score
f2_score = fbeta_score(y_test, y_pred, beta=2)
print(f"F2 Score: {f2_score:.4f}")

# Feature importance analysis
importances = rf_model.feature_importances_
feature_importance = pd.Series(importances, index=X.columns).sort_values(ascending=False)

# Display the feature importance
print("Feature Importance (Sentence Transformers):")
print(feature_importance)

# Optionally, plot the feature importance
plt.figure(figsize=(10, 6))
feature_importance.plot(kind='bar')
plt.title('Feature Importance of Sentence Transformers')
plt.xlabel('Sentence Transformers')
plt.ylabel('Importance Score')
plt.show()

In [None]:
# Cell [10] - Feature Selection and Model Evaluation with Top Features
# Purpose: Evaluate model performance using only top-ranked features for optimization
# Dependencies: sklearn, pandas, matplotlib
# Breadcrumbs: Random Forest Analysis -> Feature Selection -> Top Features Evaluation

# Prepare the data
X = df.drop(columns=['is_ground_truth', 'source_id', 'target_id', 'source_content', 'target_content'])
y = df['is_ground_truth'].astype(int)

# Train the initial Random Forest model to get feature importances
rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)
rf_model.fit(X, y)

# Feature importance analysis
importances = rf_model.feature_importances_
feature_importance = pd.Series(importances, index=X.columns).sort_values(ascending=False)

# Select top N features (e.g., top 5)
top_n = 5
top_features = feature_importance.head(top_n).index.tolist()

# Create a new DataFrame with only the top features
X_top = X[top_features]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_top, y, test_size=0.2, random_state=42)

# Train the Random Forest model with top features
rf_model_top = RandomForestClassifier(class_weight='balanced', random_state=42)
rf_model_top.fit(X_train, y_train)

# Predictions
y_pred_top = rf_model_top.predict(X_test)

# Evaluation
print("Classification report for model with top features:")
print(classification_report(y_test, y_pred_top))

# Calculate F2 score
f2_score_top = fbeta_score(y_test, y_pred_top, beta=2)
print(f"F2 Score with top features: {f2_score_top:.4f}")

# Display feature importance for the top features
print("Feature Importance (Top Features):")
print(feature_importance[top_features])

# Optionally, plot the feature importance for the top features
plt.figure(figsize=(10, 6))
feature_importance[top_features].plot(kind='bar')
plt.title('Feature Importance of Top Sentence Transformers')
plt.xlabel('Sentence Transformers')
plt.ylabel('Importance Score')
plt.show()

In [None]:
# Cell [11] - F1 and F2 Score Evaluation for Top N Features
# Purpose: Systematically evaluate F1 and F2 scores across different numbers of top features
# Dependencies: sklearn, pandas, matplotlib
# Breadcrumbs: Top Features Evaluation -> Performance Metrics -> Score Optimization

# Prepare the data
X = df.drop(columns=['is_ground_truth', 'source_id', 'target_id', 'source_content', 'target_content'])
y = df['is_ground_truth'].astype(int)

# Train an initial Random Forest model to get feature importances
rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)
rf_model.fit(X, y)

# Feature importance analysis
importances = rf_model.feature_importances_
feature_importance = pd.Series(importances, index=X.columns).sort_values(ascending=False)

# Store results
results = []

# Evaluate models for top N features
for n in range(1, len(feature_importance) + 1):
    # Select top N features
    top_features = feature_importance.head(n).index.tolist()
    X_top = X[top_features]

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_top, y, test_size=0.2, random_state=42)

    # Train the Random Forest model with top features
    rf_model_top = RandomForestClassifier(class_weight='balanced', random_state=42)
    rf_model_top.fit(X_train, y_train)

    # Predictions
    y_pred_top = rf_model_top.predict(X_test)

    # Calculate F1 and F2 scores
    f1_score = fbeta_score(y_test, y_pred_top, beta=1)
    f2_score = fbeta_score(y_test, y_pred_top, beta=2)

    # Store the results
    results.append({'N': n, 'F1 Score': f1_score, 'F2 Score': f2_score})

# Convert results to DataFrame for easier analysis
results_df = pd.DataFrame(results)

# Display results
print(results_df)

# Plot F1 and F2 scores
plt.figure(figsize=(12, 6))
plt.plot(results_df['N'], results_df['F1 Score'], marker='o', label='F1 Score', color='blue')
plt.plot(results_df['N'], results_df['F2 Score'], marker='o', label='F2 Score', color='orange')
plt.title('F1 and F2 Scores for Top N Features')
plt.xlabel('Number of Top Features (N)')
plt.ylabel('Score')
plt.xticks(results_df['N'])
plt.legend()
plt.grid()
plt.show()

In [None]:
# Cell [12] - Principal Component Analysis Visualization
# Purpose: Apply PCA for dimensionality reduction and visualize data distribution patterns
# Dependencies: sklearn.decomposition, matplotlib
# Breadcrumbs: Score Optimization -> Dimensionality Reduction -> PCA Visualization

# Prepare the data
X = df.drop(columns=['is_ground_truth', 'source_id', 'target_id', 'source_content', 'target_content'])
y = df['is_ground_truth'].astype(int)

# PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Plotting
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis', edgecolor='k', s=50)
plt.title('PCA of Sentence Transformers')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(label='is_ground_truth')
plt.show()

In [None]:
# Cell [13] - Claude AI Integration and Mapping Processing
# Purpose: Initialize Claude AI analyzer and process requirement mappings for enhanced analysis
# Dependencies: os, ClaudeRequirementAnalyzer, DebugTimer, logger, pandas
# Breadcrumbs: PCA Visualization -> AI Enhancement -> Claude Integration

# Get configuration from environment
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
claude_model = os.getenv("CLAUDE_3_5_MODEL")
min_association_prob = float(os.getenv("MIN_ASSOCIATION_PROBABILITY", 0.6))

# Validate environment variables
if not anthropic_api_key:
    raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
if not claude_model:
    raise ValueError("CLAUDE_3_5_MODEL not found in environment variables")

logger.debug("Retrieved configuration from environment variables")

# Initialize Claude analyzer
try:
    claude_analyzer = ClaudeRequirementAnalyzer(
        api_key=anthropic_api_key,
        model_name=claude_model,
        min_association_probability=min_association_prob
    )
    logger.info(f"Successfully initialized Claude analyzer with model: {claude_model}")
    
    # Log the system prompt being used
    logger.debug(f"Using system prompt:\n{claude_analyzer.system_prompt}")
    
    # Process mappings with Claude
    logger.info("Starting mapping processing with Claude...")
    timer = DebugTimer("Processing mappings with Claude")
    timer.start()
    
    test_mode = os.getenv("TEST_MODE", "false").strip().lower() in ["true", "1", "t", "yes"]
    if test_mode:
        logger.info("Test mode is enabled. Processing first 2 source requirements with their first 12 targets each.")
        # Get unique source IDs
        unique_sources = similarity_results['data']['source_id'].unique()
        # Take first 2 source IDs
        test_sources = unique_sources[:2]
        
        # Filter for first 2 sources and their first 12 targets each
        filtered_data = []
        for source in test_sources:
            source_data = similarity_results['data'][similarity_results['data']['source_id'] == source]
            filtered_data.append(source_data.head(12))
        
        # Combine the filtered data
        test_data = pd.concat(filtered_data)
        
        # Create test similarity results with original metadata
        test_similarity_results = {
            'metadata': similarity_results['metadata'],
            'data': test_data
        }
        
        similarity_results = test_similarity_results
        logger.info(f"Filtered to {len(test_data)} pairs for testing")
    else:
        logger.info("Test mode is disabled. Processing all source requirements.")
    
    # Pass Neo4j client to process_mappings
    claude_requirements_results_set = claude_analyzer.process_mappings(
        mappings=similarity_results,  # This contains the model names in metadata
        neo4j_client=neo4j_client
    )
    
    timer.end()
    logger.info(f"Processing completed in {timer.duration:.2f} seconds")
    
    # Log summary statistics
    logger.info(f"Processed {claude_requirements_results_set.total_target_matches} requirement matches")
    logger.info(f"Found {claude_requirements_results_set.total_associated_matches} associated matches")
    
    # Show detailed debug info about some matches
    if claude_requirements_results_set.processed_matches:
        logger.info("Sample of processed matches:")
        for i, match in enumerate(claude_requirements_results_set.processed_matches[:2]):
            similarity_score = f"{match.similarity_score:.3f}" if match.similarity_score is not None else "N/A"
            logger.debug(f"""
            Match {i+1}:
            Source ID: {match.source_id}
            Source Content: {match.source_content[:100]}...
            Target ID: {match.target_id}
            Target Content: {match.target_content[:100]}...
            Similarity Score: {similarity_score}
            Association Probability: {match.association_probability:.3f}
            Is Associated: {match.is_associated}
            Explanation: {match.explanation}
            """)
        # Print summary of results
        logger.info("\nClaude Analysis Summary:")
        logger.info(f"Total source requirements processed: {claude_requirements_results_set.total_source_requirements}")
        logger.info(f"Total target matches analyzed: {claude_requirements_results_set.total_target_matches}")
        logger.info(f"Total associated matches found: {claude_requirements_results_set.total_associated_matches}")

        # Print breakdown by source requirement
        logger.info("\nBreakdown by source requirement:")
        source_counts = {}
        associated_counts = {}

        for match in claude_requirements_results_set.processed_matches:
            source_counts[match.source_id] = source_counts.get(match.source_id, 0) + 1
            if match.is_associated:
                associated_counts[match.source_id] = associated_counts.get(match.source_id, 0) + 1

        for source_id, count in source_counts.items():
            associated = associated_counts.get(source_id, 0)
            logger.info(f"Source {source_id}:")
            logger.info(f"  - Total target matches: {count}")
            logger.info(f"  - Associated matches: {associated}")

except Exception as e:
    logger.error(f"Error in Claude analysis: {str(e)}")
    raise

In [None]:
# [NEW CELL] - Select Top Transformers and Process with Claude

logger = setup_logging(__name__, logging.INFO)

# First, identify top performing transformers using Random Forest
logger.info("Analyzing transformer performance with Random Forest...")

# Prepare the data - exclude non-transformer columns
X = df.drop(columns=['is_ground_truth', 'source_id', 'target_id', 'source_content', 'target_content'])
y = df['is_ground_truth'].astype(int)

# Train Random Forest model
rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)
rf_model.fit(X, y)

# Get feature importance and select top 5 transformers
feature_importance = pd.Series(rf_model.feature_importances_, index=X.columns).sort_values(ascending=False)
top_5_transformers = feature_importance.head(5).index.tolist()

logger.info(f"Selected top 5 transformers based on importance:")
for i, transformer in enumerate(top_5_transformers, 1):
    logger.info(f"{i}. {transformer} (importance: {feature_importance[transformer]:.4f})")

# Get configuration from environment
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
claude_model = os.getenv("CLAUDE_3_5_MODEL")
min_association_prob = float(os.getenv("MIN_ASSOCIATION_PROBABILITY", 0.6))

# Validate environment variables
if not anthropic_api_key:
    raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
if not claude_model:
    raise ValueError("CLAUDE_3_5_MODEL not found in environment variables")

logger.debug("Retrieved configuration from environment variables")

# Initialize Claude analyzer with top transformers
try:
    claude_analyzer = ClaudeRequirementAnalyzer(
        api_key=anthropic_api_key,
        model_name=claude_model,
        min_association_probability=min_association_prob,
        transformer_names=top_5_transformers  # Pass the top transformers
    )
    logger.info(f"Successfully initialized Claude analyzer with model: {claude_model}")
    
    # Log the system prompt being used
    logger.debug(f"Using system prompt:\n{claude_analyzer.system_prompt}")
    
    # Process mappings with Claude using selected transformers
    logger.info("Starting mapping processing with selected transformers...")
    timer = DebugTimer("Processing mappings with selected transformers")
    timer.start()
    
    test_mode = os.getenv("TEST_MODE", "false").strip().lower() in ["true", "1", "t", "yes"]
    if test_mode:
        logger.info("Test mode is enabled. Processing first 2 source requirements with their first 12 targets each.")
        # Get unique source IDs
        unique_sources = similarity_results['data']['source_id'].unique()
        # Take first 2 source IDs
        test_sources = unique_sources[:2]
        
        # Filter for first 2 sources and their first 12 targets each
        filtered_data = []
        for source in test_sources:
            source_data = similarity_results['data'][similarity_results['data']['source_id'] == source]
            filtered_data.append(source_data.head(12))
        
        # Combine the filtered data
        test_data = pd.concat(filtered_data)
        
        # Create test similarity results with original metadata
        test_similarity_results = {
            'metadata': similarity_results['metadata'],
            'data': test_data
        }
        
        similarity_results = test_similarity_results
        logger.info(f"Filtered to {len(test_data)} pairs for testing")
    else:
        logger.info("Test mode is disabled. Processing all source requirements.")
    
    # Use the new process_mappings_with_sentence_transformers function
    claude_requirements_results_set = claude_analyzer.process_mappings_with_sentence_transformers(
        mappings=similarity_results,
        sentence_transformers=top_5_transformers,
        neo4j_client=neo4j_client
    )
    
    timer.end()
    logger.info(f"Processing completed in {timer.duration:.2f} seconds")
    
    # Log summary statistics
    logger.info(f"Processed {claude_requirements_results_set.total_target_matches} requirement matches")
    logger.info(f"Found {claude_requirements_results_set.total_associated_matches} associated matches")
    
    # Show detailed debug info about some matches
    if claude_requirements_results_set.processed_matches:
        logger.info("Sample of processed matches:")
        for i, match in enumerate(claude_requirements_results_set.processed_matches[:2]):
            logger.debug(f"""
            Match {i+1}:
            Source ID: {match.source_id}
            Source Content: {match.source_content[:100]}...
            Target ID: {match.target_id}
            Target Content: {match.target_content[:100]}...
            Association Probability: {match.association_probability:.3f}
            Is Associated: {match.is_associated}
            Explanation: {match.explanation}
            """)
        
        # Print summary of results
        logger.info("\nClaude Analysis Summary:")
        logger.info(f"Total source requirements processed: {claude_requirements_results_set.total_source_requirements}")
        logger.info(f"Total target matches analyzed: {claude_requirements_results_set.total_target_matches}")
        logger.info(f"Total associated matches found: {claude_requirements_results_set.total_associated_matches}")

        # Print breakdown by source requirement
        logger.info("\nBreakdown by source requirement:")
        source_counts = {}
        associated_counts = {}

        for match in claude_requirements_results_set.processed_matches:
            source_counts[match.source_id] = source_counts.get(match.source_id, 0) + 1
            if match.is_associated:
                associated_counts[match.source_id] = associated_counts.get(match.source_id, 0) + 1

        for source_id, count in source_counts.items():
            associated = associated_counts.get(source_id, 0)
            logger.info(f"Source {source_id}:")
            logger.info(f"  - Total target matches: {count}")
            logger.info(f"  - Associated matches: {associated}")

except Exception as e:
    logger.error(f"Error in Claude analysis: {str(e)}")
    raise

In [None]:
os.getenv("TEST_MODE", "false").strip().lower() in ["true", "1", "t", "yes"]

In [None]:
# [CELL 6] - Create LLM Requirement Traces
#if claude_requirements_results_set.processed_matches:
    # Configuration
#    sentence_transformer_model = model_name
#    llm_model = claude_model  # Using the Claude model name from environment

#    logger.info("Creating LLM requirement traces...")
#    logger.info(f"Sentence Transformer Model: {sentence_transformer_model}")
#    logger.info(f"LLM Model: {llm_model}")

    # Create traces
#    success_count, failure_count = neo4j_client.create_llm_traces_from_results(
#        results_set=claude_requirements_results_set,
#        model_name=sentence_transformer_model,
#        llm_model_name=llm_model
#    )

    # Log results
 #   logger.info(f"Successfully created {success_count} LLM requirement traces")
 #   if failure_count > 0:
 #       logger.warning(f"Failed to create {failure_count} traces")

    # Verify creation
 #   verification_query = f"""
 #   MATCH ()-[r:LLM_REQUIREMENT_TRACE]->()
 #   WHERE r.llm_model_name = '{llm_model}'
 #   RETURN count(r) as count
#    """

 #   with neo4j_client.driver.session(database=neo4j_client.database) as session:
 #       result = session.run(verification_query)
#        trace_count = result.single()["count"]
#        logger.info(f"Total LLM requirement traces in database for {llm_model}: {trace_count}")

In [None]:
# [CELL 7] - Statistical Analysis
logger.info("Performing statistical analysis of results")

claude_model = os.getenv("CLAUDE_2_MODEL")

# Initialize analyzer if not already initialized
analyzer = RequirementsAnalyzer(
    neo4j_client=neo4j_client,
    sentence_transformer_model_name=model_name,  # Updated variable name
    llm_model_name=claude_model
)

# Calculate basic metrics
metrics = analyzer.calculate_metrics(
    llm_model_name=claude_model,
    sentence_transformer_model_name=model_name
)

# Print detailed results
logger.info("\nDetailed Metrics:")
logger.info(f"True Positives: {metrics['true_positives']}")
logger.info(f"False Positives: {metrics['false_positives']}")
logger.info(f"True Negatives: {metrics['true_negatives']}")
logger.info(f"False Negatives: {metrics['false_negatives']}")
logger.info(f"\nPrecision: {metrics['precision']:.4f}")
logger.info(f"Recall: {metrics['recall']:.4f}")
logger.info(f"Accuracy: {metrics['accuracy']:.4f}")
logger.info(f"Specificity: {metrics['specificity']:.4f}")
logger.info(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}")
logger.info(f"F1 Score: {metrics['f1_score']:.4f}")

# Get confusion matrix
conf_matrix = analyzer.get_confusion_matrix()
logger.info("\nConfusion Matrix:")
logger.info("[[TN, FP]")
logger.info(" [FN, TP]]")
logger.info(f"\n{conf_matrix}")

# Get detailed classification information
logger.info("\nDetailed Classification Analysis:")
classification_details = analyzer.get_all_classification_details()

# True Positives Analysis
tp_details = pd.DataFrame(classification_details['true_positives'])
if not tp_details.empty:
    logger.info("\nTrue Positives Details:")
    logger.info(f"Total Count: {len(tp_details)}")
    logger.info("\nSample of True Positive matches:")
    logger.info(tp_details.head().to_string())
    logger.info(f"\nAverage LLM Confidence: {tp_details['llm_confidence'].mean():.4f}")
    logger.info(f"Average Ground Truth Confidence: {tp_details['ground_truth_confidence'].mean():.4f}")

# False Positives Analysis
fp_details = pd.DataFrame(classification_details['false_positives'])
if not fp_details.empty:
    logger.info("\nFalse Positives Details:")
    logger.info(f"Total Count: {len(fp_details)}")
    logger.info("\nSample of False Positive matches:")
    logger.info(fp_details.head().to_string())
    logger.info(f"\nAverage LLM Confidence: {fp_details['llm_confidence'].mean():.4f}")

# False Negatives Analysis
fn_details = pd.DataFrame(classification_details['false_negatives'])
if not fn_details.empty:
    logger.info("\nFalse Negatives Details:")
    logger.info(f"Total Count: {len(fn_details)}")
    logger.info("\nSample of False Negative matches:")
    logger.info(fn_details.head().to_string())
    logger.info(f"\nAverage Ground Truth Confidence: {fn_details['ground_truth_confidence'].mean():.4f}")

# True Negatives Analysis
tn_details = pd.DataFrame(classification_details['true_negatives'])
if not tn_details.empty:
    logger.info("\nTrue Negatives Details:")
    logger.info(f"Total Count: {len(tn_details)}")
    logger.info("\nSample of True Negative pairs:")
    logger.info(tn_details.head().to_string())

# Save detailed results to file
results_dir = Path("results/analysis")
results_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_file = results_dir / f"detailed_analysis_{timestamp}.json"

detailed_results = {
    'metrics': metrics,
    'confusion_matrix': conf_matrix.tolist(),
    'classification_details': classification_details
}

with open(results_file, 'w') as f:
    json.dump(detailed_results, f, indent=2)

logger.info(f"\nDetailed analysis results saved to {results_file}")

In [None]:
# [CELL 8] - Visualize Analysis Results

# Initialize visualizer with model information
visualizer = RequirementsVisualizer(
    figsize=(8, 6),  # Reduced figure size
    st_model=model_name,
    llm_model=claude_model
)

# Get current timestamp for filenames
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create model-specific results directory
st_name = model_name.replace('/', '_').replace('-', '_')
llm_name = claude_model.replace('-', '_')
results_dir = Path(f"results/analysis_plots/{llm_name}/{st_name}")
results_dir.mkdir(parents=True, exist_ok=True)

# Get metrics for Claude model
metrics = analyzer.calculate_metrics()

# Plot and display confusion matrix
plt.figure(figsize=(8, 6))  # Controlled figure size
conf_matrix = analyzer.get_confusion_matrix()
visualizer.plot_confusion_matrix(
    conf_matrix,
    title="Model Confusion Matrix",
    save_path=str(results_dir / f"confusion_{timestamp}.png"),
    dpi=100
)
plt.show()

# Create and display metrics summary table
metrics_to_display = [
    'precision', 'recall', 'f1_score', 
    'accuracy', 'balanced_accuracy'
]

metrics_df = pd.DataFrame({
    'Metric': metrics_to_display,
    'Value': [f"{metrics[metric]:.4f}" for metric in metrics_to_display]
})

# Set pandas display options for better formatting
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Display the metrics table
display(metrics_df)

# Log paths to saved plots
logger.info(f"\nPlots saved to:")
logger.info(f"- Confusion Matrix: {results_dir}/confusion_{timestamp}.png")

# Close any remaining matplotlib figures
plt.close('all')

In [None]:
# [CELL 9] - Cleanup
neo4j_client.close()
logger.info("Analysis complete")