# EpiRust Diagnostic Statistics Demo

This notebook demonstrates EpiRust's diagnostic statistics capabilities, including:

1. Sensitivity and Specificity Analysis
2. ROC Curve Generation
3. Predictive Values
4. Likelihood Ratios

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from epirust.stats import DiagnosticStats

# Set random seed and plotting style
np.random.seed(42)
plt.style.use('seaborn')
sns.set_palette("husl")

## Generate Sample Diagnostic Test Data

Let's simulate a diagnostic test scenario with known disease status and test results:

In [None]:
def generate_test_data(n_samples=1000, sensitivity=0.85, specificity=0.95, prevalence=0.1):
    # True disease status
    true_status = np.random.binomial(1, prevalence, n_samples)
    
    # Generate test results with given sensitivity and specificity
    test_results = np.zeros_like(true_status)
    
    # True positives
    diseased = true_status == 1
    test_results[diseased] = np.random.binomial(1, sensitivity, np.sum(diseased))
    
    # False positives
    healthy = true_status == 0
    test_results[healthy] = np.random.binomial(1, 1 - specificity, np.sum(healthy))
    
    return pd.DataFrame({
        'true_status': true_status,
        'test_result': test_results
    })

# Generate data
df = generate_test_data()
stats = DiagnosticStats(df['true_status'], df['test_result'])

# Display confusion matrix
print("Confusion Matrix:")
print(stats.confusion_matrix())

## Basic Diagnostic Measures

In [None]:
# Calculate basic measures
measures = {
    'Sensitivity': stats.sensitivity(),
    'Specificity': stats.specificity(),
    'PPV': stats.positive_predictive_value(),
    'NPV': stats.negative_predictive_value(),
    'Accuracy': stats.accuracy(),
    'F1 Score': stats.f1_score()
}

# Display results with confidence intervals
results_df = pd.DataFrame([
    {
        'Measure': name,
        'Value': value,
        'CI': stats.confidence_interval(name.lower())
    }
    for name, value in measures.items()
])

print("Diagnostic Measures with 95% Confidence Intervals:")
print(results_df.to_string(index=False))

## Likelihood Ratios and Diagnostic Odds Ratio

In [None]:
# Calculate likelihood ratios
lr_positive = stats.positive_likelihood_ratio()
lr_negative = stats.negative_likelihood_ratio()
dor = stats.diagnostic_odds_ratio()

print(f"Positive Likelihood Ratio: {lr_positive:.2f}")
print(f"Negative Likelihood Ratio: {lr_negative:.2f}")
print(f"Diagnostic Odds Ratio: {dor:.2f}")

## ROC Curve Analysis

Let's generate and analyze the ROC curve for our test:

In [None]:
# Generate ROC curve data
roc_data = stats.roc_curve()
auc = stats.auc()

# Plot ROC curve
plt.figure(figsize=(8, 8))
plt.plot(roc_data['fpr'], roc_data['tpr'], label=f'ROC (AUC = {auc:.3f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Prevalence Effects

Let's examine how predictive values change with disease prevalence:

In [None]:
# Calculate predictive values across different prevalences
prevalences = np.linspace(0.01, 0.5, 50)
ppvs = [stats.positive_predictive_value(prev) for prev in prevalences]
npvs = [stats.negative_predictive_value(prev) for prev in prevalences]

# Plot
plt.figure(figsize=(10, 6))
plt.plot(prevalences, ppvs, label='PPV')
plt.plot(prevalences, npvs, label='NPV')
plt.xlabel('Disease Prevalence')
plt.ylabel('Predictive Value')
plt.title('Predictive Values vs Disease Prevalence')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Bootstrap Analysis

Let's perform bootstrap analysis to get robust confidence intervals:

In [None]:
# Perform bootstrap analysis
bootstrap_results = stats.bootstrap_analysis(n_iterations=1000)

# Plot distributions
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
measures = ['sensitivity', 'specificity', 'ppv', 'npv']
titles = ['Sensitivity', 'Specificity', 'PPV', 'NPV']

for ax, measure, title in zip(axes.flat, measures, titles):
    sns.histplot(bootstrap_results[measure], ax=ax)
    ax.set_title(title)
    ax.set_xlabel('Value')
    
plt.tight_layout()
plt.show()

# Display bootstrap confidence intervals
print("\nBootstrap 95% Confidence Intervals:")
for measure in measures:
    ci = np.percentile(bootstrap_results[measure], [2.5, 97.5])
    print(f"{measure.upper()}: ({ci[0]:.3f}, {ci[1]:.3f})")

## Conclusion

This notebook demonstrated EpiRust's comprehensive diagnostic statistics capabilities:

1. Basic measures (sensitivity, specificity, predictive values)
2. Advanced metrics (likelihood ratios, diagnostic odds ratio)
3. ROC curve analysis
4. Prevalence effects on predictive values
5. Bootstrap analysis for confidence intervals

These tools provide a robust framework for evaluating diagnostic tests in epidemiological studies.