In [None]:
"""
Attending Supervision Efficacy
Question: Does mandatory attending supervision change state or just stabilize?
"""
import json
import pandas as pd
import numpy as np
from pathlib import Path

# Load data
holdout_path = Path('outputs/Medical-Escalation-EFF_20260127_184136/holdout_validation/holdout_hospitals.json')
with open(holdout_path, 'r') as f:
    holdout_data = json.load(f)

holdout = pd.DataFrame(holdout_data)

print("="*80)
print("ANALYSIS 2: ATTENDING SUPERVISION EFFICACY")
print("="*80)

# Axioms requiring attending supervision
# Axiom #8: MORT_30_HF > 12 AND MORT_30_AMI > 12 AND PSI_09 > 2.5
# Axiom #9: MORT_30_PN > 15 AND PSI_09 > 2.3 AND PSI_11 > 9
# Axiom #21: (MORT_30_PN > 15 OR MORT_30_HF > 12) AND PSI_09 > 2.0

print(f"\nüéØ HYPOTHESIS:")
print(f"   If attending supervision is State-CHANGER:")
print(f"      ‚Üí Lower mortality than comparable non-supervised patterns")
print(f"      ‚Üí Fewer complications")
print(f"      ‚Üí Lower transfer rates")
print(f"\n   If attending supervision is State-STABILIZER:")
print(f"      ‚Üí Similar mortality to non-supervised")
print(f"      ‚Üí Similar complications")
print(f"      ‚Üí Just prevents further deterioration")

# Define supervision-required axioms
supervised_axiom8 = (
    (holdout['MORT_30_HF'] > 12) &
    (holdout['MORT_30_AMI'] > 12) &
    (holdout['PSI_09'] > 2.5)
)

supervised_axiom9 = (
    (holdout['MORT_30_PN'] > 15) &
    (holdout['PSI_09'] > 2.3) &
    (holdout['PSI_11'] > 9)
)

# Define comparable non-supervised axioms
# Axiom #5: Same as #9 but without supervision requirement
nonsupervised_axiom5 = supervised_axiom9.copy()

# Axiom #4: Same as #8 
nonsupervised_axiom4 = supervised_axiom8.copy()

print(f"\nüìä Pattern Prevalence:")
print(f"   Supervised Axiom #8: {supervised_axiom8.sum()} hospitals")
print(f"   Supervised Axiom #9: {supervised_axiom9.sum()} hospitals")
print(f"   Non-supervised Axiom #5: {nonsupervised_axiom5.sum()} hospitals")
print(f"   Non-supervised Axiom #4: {nonsupervised_axiom4.sum()} hospitals")

# Calculate mortality for each group
mortality_cols = ['MORT_30_PN', 'MORT_30_HF', 'MORT_30_AMI', 'MORT_30_COPD']
available_mort = [c for c in mortality_cols if c in holdout.columns]

if available_mort:
    holdout['Avg_Mortality'] = holdout[available_mort].mean(axis=1)
    
    print(f"\nüî¨ MORTALITY COMPARISON:")
    print(f"   {'Pattern':<25s} {'Avg Mortality':>15s} {'N':>8s}")
    print(f"   " + "-"*50)
    
    results = {}
    patterns = {
        'Supervised #8 (HF+AMI+Bleed)': supervised_axiom8,
        'Non-supervised #4 (same)': nonsupervised_axiom4,
        'Supervised #9 (PN+Bleed+Resp)': supervised_axiom9,
        'Non-supervised #5 (same)': nonsupervised_axiom5,
    }
    
    for name, mask in patterns.items():
        if mask.sum() > 0:
            avg_mort = holdout[mask]['Avg_Mortality'].mean()
            results[name] = avg_mort
            print(f"   {name:<25s} {avg_mort:15.2f} {mask.sum():8d}")
    
    # Test if supervision makes a difference
    if len(results) >= 4:
        supervised_avg = (results['Supervised #8 (HF+AMI+Bleed)'] + 
                         results['Supervised #9 (PN+Bleed+Resp)']) / 2
        nonsupervised_avg = (results['Non-supervised #4 (same)'] + 
                            results['Non-supervised #5 (same)']) / 2
        
        diff = supervised_avg - nonsupervised_avg
        
        print(f"\nüéØ VERDICT:")
        if abs(diff) < 0.5:
            print(f"   ‚ùå NO MEANINGFUL DIFFERENCE (Œî = {diff:+.2f}%)")
            print(f"      Interpretation: Supervision is STATE-STABILIZER")
            print(f"      - Doesn't return patients to State 1/2")
            print(f"      - Just maintains State 3 safely")
        elif diff < -1.0:
            print(f"   ‚úÖ SUPERVISION REDUCES MORTALITY (Œî = {diff:+.2f}%)")
            print(f"      Interpretation: Supervision is STATE-CHANGER")
            print(f"      - Actually improves outcomes")
            print(f"      - May facilitate State 3 ‚Üí State 1 transition")
        else:
            print(f"   ‚ö†Ô∏è  SUPERVISION INCREASES MORTALITY (Œî = {diff:+.2f}%)")
            print(f"      Interpretation: Selection bias")
            print(f"      - Sicker patients get supervision")

# Check complication burden
complication_cols = [c for c in holdout.columns if c.startswith('PSI_')]
if complication_cols:
    holdout['Total_Complications'] = holdout[complication_cols].sum(axis=1)
    
    print(f"\nüî¨ COMPLICATION BURDEN:")
    print(f"   {'Pattern':<25s} {'Avg Complications':>20s}")
    print(f"   " + "-"*50)
    
    comp_results = {}
    for name, mask in patterns.items():
        if mask.sum() > 0:
            avg_comp = holdout[mask]['Total_Complications'].mean()
            comp_results[name] = avg_comp
            print(f"   {name:<25s} {avg_comp:20.2f}")
    
    if len(comp_results) >= 4:
        supervised_comp = (comp_results['Supervised #8 (HF+AMI+Bleed)'] + 
                          comp_results['Supervised #9 (PN+Bleed+Resp)']) / 2
        nonsupervised_comp = (comp_results['Non-supervised #4 (same)'] + 
                             comp_results['Non-supervised #5 (same)']) / 2
        
        comp_diff = supervised_comp - nonsupervised_comp
        
        print(f"\n   Difference: {comp_diff:+.2f} complications")
        if abs(comp_diff) < 1.0:
            print(f"   ‚Üí Similar complication burden (State-Stabilizer)")
        elif comp_diff < -1.0:
            print(f"   ‚Üí Fewer complications with supervision (State-Changer)")

# Check transfer rates
if 'Hybrid_HWM' in holdout.columns:
    print(f"\nüöë TRANSFER INDICATORS:")
    holdout['High_Resource_Use'] = holdout['Hybrid_HWM'] > 4.5
    
    print(f"   {'Pattern':<25s} {'% High Resource Use':>20s}")
    print(f"   " + "-"*50)
    
    for name, mask in patterns.items():
        if mask.sum() > 0:
            transfer_rate = holdout[mask]['High_Resource_Use'].mean() * 100
            print(f"   {name:<25s} {transfer_rate:19.1f}%")

print("\n" + "="*80)
print("‚úÖ ANALYSIS 2 COMPLETE")
print("="*80)
print("\nKEY FINDING:")
print("If supervised patterns show similar mortality/complications to")
print("non-supervised, then supervision is a HOLDING PATTERN (State-Stabilizer).")
print("\nIf supervised patterns show LOWER mortality/complications,")
print("then supervision actively improves outcomes (State-Changer).")

In [None]:
"""
Hospital Overlap Analysis: Supervised vs Non-Supervised Axioms
Fixed version with proper error handling and data structure detection
"""

import json
import pandas as pd
from pathlib import Path
from typing import Set, Dict
import os

def find_holdout_data():
    """Find the holdout hospital data file"""
    # Possible locations (Windows paths)
    base_paths = [
        ".",  # Current directory
        "..",  # Parent directory
        "../..",  # Two levels up
        "outputs/Medical-Escalation-EFF_20260127_184136/holdout_validation",
        "../outputs/Medical-Escalation-EFF_20260127_184136/holdout_validation",
    ]
    
    possible_files = [
        "holdout_hospitals.json",
        "holdout_data.json", 
        "validation_hospitals.json",
        "hospital_data.json"
    ]
    
    for base in base_paths:
        for filename in possible_files:
            path = Path(base) / filename
            if path.exists():
                print(f"‚úì Found data file: {path}")
                return str(path)
    
    # If not found, list what IS available
    print("‚ùå Could not find holdout hospital data file.")
    print("\nFiles in current directory:")
    for f in Path(".").glob("*.json"):
        print(f"  - {f}")
    
    raise FileNotFoundError("Could not locate holdout hospital data")

def load_data():
    """Load hospital data from JSON"""
    # Try to find the file
    data_path = find_holdout_data()
    
    with open(data_path, 'r') as f:
        data = json.load(f)
    
    # Handle different JSON structures
    if isinstance(data, list):
        df = pd.DataFrame(data)
    elif isinstance(data, dict):
        if 'observations' in data:
            df = pd.DataFrame(data['observations'])
        elif 'hospitals' in data:
            df = pd.DataFrame(data['hospitals'])
        elif 'data' in data:
            df = pd.DataFrame(data['data'])
        else:
            # Try to convert the dict itself
            df = pd.DataFrame([data])
    else:
        raise ValueError(f"Unexpected data type: {type(data)}")
    
    print(f"‚úì Loaded {len(df)} hospitals")
    print(f"‚úì Columns: {', '.join(df.columns[:15])}{'...' if len(df.columns) > 15 else ''}")
    
    return df

def apply_axiom_conditions(df: pd.DataFrame, axiom_num: int) -> pd.Series:
    """Apply axiom conditions to get boolean mask of matching hospitals"""
    
    # Check for required columns
    required_cols = {
        4: ['MORT_30_HF', 'MORT_30_AMI', 'PSI_09'],
        5: ['MORT_30_PN', 'PSI_09', 'PSI_11'],
        8: ['MORT_30_HF', 'MORT_30_AMI', 'PSI_09'],
        9: ['MORT_30_PN', 'PSI_09', 'PSI_11'],
    }
    
    if axiom_num not in required_cols:
        raise ValueError(f"Axiom {axiom_num} not defined")
    
    missing = [col for col in required_cols[axiom_num] if col not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns for Axiom {axiom_num}: {missing}")
    
    # Apply conditions (same for supervised and non-supervised versions)
    if axiom_num in [4, 8]:  # HF+AMI+Bleed
        return (df['MORT_30_HF'] > 12) & (df['MORT_30_AMI'] > 12) & (df['PSI_09'] > 2.5)
    else:  # 5, 9: PN+Bleed+Resp
        return (df['MORT_30_PN'] > 15) & (df['PSI_09'] > 2.3) & (df['PSI_11'] > 9)

def get_hospital_ids(df: pd.DataFrame, mask: pd.Series) -> Set[str]:
    """Extract hospital IDs for hospitals matching the mask"""
    # Try different possible ID column names
    id_cols = ['hospital_id', 'Hospital_ID', 'Provider_ID', 'Facility_ID', 
               'provider_id', 'facility_id', 'ID', 'id']
    
    for col in id_cols:
        if col in df.columns:
            return set(df.loc[mask, col].astype(str).tolist())
    
    # Fallback: use index
    print("‚ö†Ô∏è  Warning: No hospital ID column found, using row index")
    return set(df[mask].index.astype(str).tolist())

def calculate_overlap(set_a: Set, set_b: Set) -> Dict:
    """Calculate overlap statistics"""
    intersection = set_a & set_b
    union = set_a | set_b
    
    jaccard = len(intersection) / len(union) if union else 0
    overlap_pct = len(intersection) / len(set_a) * 100 if set_a else 0
    
    return {
        'n_a': len(set_a),
        'n_b': len(set_b),
        'intersection': len(intersection),
        'only_a': len(set_a - set_b),
        'only_b': len(set_b - set_a),
        'jaccard': jaccard,
        'overlap_pct': overlap_pct,
        'sample_shared': sorted(list(intersection))[:5],
        'sample_only_a': sorted(list(set_a - set_b))[:5],
        'sample_only_b': sorted(list(set_b - set_a))[:5]
    }

def interpret(jaccard: float) -> str:
    """Interpret Jaccard similarity"""
    if jaccard >= 0.95:
        return "üéØ REDUNDANCY - Supervision implicit in severity"
    elif jaccard >= 0.70:
        return "üîÑ STRONG OVERLAP - Highly correlated"
    elif jaccard >= 0.40:
        return "‚öñÔ∏è PARTIAL OVERLAP - Mixed evidence"
    elif jaccard >= 0.10:
        return "üåä WEAK OVERLAP - Convergent outcomes"
    else:
        return "‚ùå NO OVERLAP - Different hospitals"

def main():
    print("="*80)
    print("HOSPITAL OVERLAP ANALYSIS: Supervised vs Non-Supervised Axioms")
    print("="*80)
    print()
    
    # Load data
    try:
        df = load_data()
        print()
    except Exception as e:
        print(f"‚ùå Error loading data: {e}")
        return
    
    # Comparisons
    comparisons = [
        {
            'name': 'HF+AMI+Bleeding',
            'supervised': 8,
            'non_supervised': 4
        },
        {
            'name': 'PN+Bleeding+Respiratory',
            'supervised': 9,
            'non_supervised': 5
        }
    ]
    
    results = []
    
    for comp in comparisons:
        print(f"\nCOMPARISON: {comp['name']}")
        print("-" * 80)
        
        try:
            # Get hospital sets
            supervised_mask = apply_axiom_conditions(df, comp['supervised'])
            nonsupervised_mask = apply_axiom_conditions(df, comp['non_supervised'])
            
            supervised_ids = get_hospital_ids(df, supervised_mask)
            nonsupervised_ids = get_hospital_ids(df, nonsupervised_mask)
            
            # Calculate overlap
            overlap = calculate_overlap(supervised_ids, nonsupervised_ids)
            
            print(f"Supervised Axiom #{comp['supervised']}: {overlap['n_a']} hospitals")
            print(f"Non-supervised Axiom #{comp['non_supervised']}: {overlap['n_b']} hospitals")
            print()
            print(f"Hospitals in BOTH: {overlap['intersection']} ({overlap['overlap_pct']:.1f}%)")
            print(f"Only in supervised: {overlap['only_a']}")
            print(f"Only in non-supervised: {overlap['only_b']}")
            print()
            print(f"üìà Jaccard Similarity: {overlap['jaccard']:.3f}")
            print(f"üí° {interpret(overlap['jaccard'])}")
            
            if overlap['sample_shared']:
                print(f"\nüè• Sample IDs:")
                print(f"   Shared: {', '.join(overlap['sample_shared'])}")
                if overlap['sample_only_a']:
                    print(f"   Only supervised: {', '.join(overlap['sample_only_a'])}")
                if overlap['sample_only_b']:
                    print(f"   Only non-supervised: {', '.join(overlap['sample_only_b'])}")
            
            results.append({
                'comparison': comp['name'],
                'jaccard': overlap['jaccard'],
                'interpretation': interpret(overlap['jaccard'])
            })
            
        except Exception as e:
            print(f"‚ùå Error: {e}")
            import traceback
            traceback.print_exc()
            continue
        
        print()
    
    # Summary
    if results:
        print("="*80)
        print("SUMMARY")
        print("="*80)
        
        avg_jaccard = sum(r['jaccard'] for r in results) / len(results)
        
        for r in results:
            print(f"{r['comparison']}: Jaccard = {r['jaccard']:.3f}")
        
        print(f"\nAverage: {avg_jaccard:.3f}")
        print()
        
        if avg_jaccard >= 0.95:
            print("üéØ CONCLUSION: REDUNDANCY CONFIRMED")
            print("   Supervised axioms select SAME hospitals.")
            print("   Supervision is implicit in State 3 severity.")
        elif avg_jaccard >= 0.40:
            print("‚öñÔ∏è CONCLUSION: MIXED EVIDENCE")
            print("   Partial overlap - supervision correlated but independent.")
        else:
            print("üåä CONCLUSION: CONVERGENCE CONFIRMED")
            print("   Different hospitals with identical outcomes.")
            print("   State 3 basin creates convergent mortality rates.")
        
        print()
        
        # Save
        output = {
            'comparisons': results,
            'average_jaccard': avg_jaccard,
            'conclusion': interpret(avg_jaccard)
        }
        
        output_file = "hospital_overlap_results.json"
        with open(output_file, 'w') as f:
            json.dump(output, f, indent=2)
        
        print(f"üíæ Results saved to: {output_file}")

if __name__ == "__main__":
    main()