# Complete ML Workflow: Synthetic Data → Training → Analysis
## End-to-end pipeline for sinus segmentation and quantitative analysis

## Step 1: Generate Synthetic Training Data

In [None]:
# Generate 50 synthetic training samples with varied pathology
!python ../src/synthetic_generator.py \
    --output-dir ../data/synthetic \
    --num-samples 50 \
    --pathology mixed \
    --severity mixed \
    --seed 42

## Step 2: Visualize Synthetic Samples

In [None]:
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Load a few synthetic samples
synthetic_dir = Path('../data/synthetic')
image_files = sorted((synthetic_dir / 'images').glob('*.nii.gz'))[:6]

fig, axes = plt.subplots(3, 4, figsize=(16, 12))

for idx, img_path in enumerate(image_files):
    # Load image and mask
    img = nib.load(str(img_path))
    volume = img.get_fdata()
    
    mask_path = synthetic_dir / 'masks' / img_path.name
    mask = nib.load(str(mask_path)).get_fdata()
    
    # Get middle slice
    mid_slice = volume.shape[0] // 2
    
    # Display image
    axes[idx, 0].imshow(volume[mid_slice], cmap='gray', vmin=-1000, vmax=400)
    axes[idx, 0].set_title(f'{img_path.stem[:30]}')
    axes[idx, 0].axis('off')
    
    # Display mask
    axes[idx, 1].imshow(mask[mid_slice], cmap='Reds')
    axes[idx, 1].set_title('Mask')
    axes[idx, 1].axis('off')

plt.tight_layout()
plt.show()

print(f"\nGenerated {len(list((synthetic_dir / 'images').glob('*.nii.gz')))} synthetic training samples")

## Step 3: Train MONAI 3D U-Net
**Note**: This requires GPU for reasonable training time. Adjust `max_epochs` in config for quick testing.

In [None]:
# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Train the model (uncomment to run - this will take time!)
# !python ../src/train_segmentation.py \
#     --config ../configs/monai_unet_config.yaml \
#     --data-dir ../data/synthetic \
#     --output-dir ../models/sinus_unet_v1 \
#     --train-split 0.8 \
#     --seed 42

## Step 4: Run Inference on Real Data

In [None]:
# Assuming you have a trained model at models/sinus_unet.pth
# Run the full pipeline with segmentation

!python ../src/pipeline.py \
    --dicom-dir ../data/raw/5301/5303 \
    --output-nifti ../data/processed/patient_5301_ct.nii.gz \
    --mask-output ../data/processed/patient_5301_mask.nii.gz \
    --metadata-json ../docs/patient_5301_meta.json

## Step 5: Quantitative Analysis

In [None]:
# Run comprehensive quantitative analysis
!python ../src/quantitative_analysis.py \
    --image ../data/processed/sinus_ct.nii.gz \
    --output ../docs/metrics/quantitative_report.json \
    --patient-id 19420531 \
    --study-date 20250418

In [None]:
# Load and display the report
import json

with open('../docs/metrics/quantitative_report.json') as f:
    report = json.load(f)

print("\n" + "="*60)
print("QUANTITATIVE ANALYSIS SUMMARY")
print("="*60)

vol = report['volumetric']
print(f"\nTotal Sinus Volume: {vol['total_sinus_volume_ml']:.2f} mL")
print(f"Air Volume: {vol['air_volume_ml']:.2f} mL")
print(f"Soft Tissue Volume: {vol['soft_tissue_volume_ml']:.2f} mL")
print(f"Air Fraction: {vol['air_fraction']:.1%}")

if report['texture']:
    tex = report['texture']
    print(f"\nTexture Features:")
    print(f"  Mean HU: {tex['mean_intensity']:.2f}")
    print(f"  Std HU: {tex['std_intensity']:.2f}")
    print(f"  Entropy: {tex['entropy']:.3f}")

if report['asymmetry_score']:
    print(f"\nAsymmetry Score: {report['asymmetry_score']:.3f}")

## Step 6: Visualize Results in 3D

In [None]:
# Generate interactive 3D mesh
!python ../src/visualize_3d.py \
    --nifti ../data/processed/sinus_ct.nii.gz \
    --iso -350 \
    --downsample 2 \
    --output ../docs/sinus_visualization.html

print("\n3D visualization saved! Open ../docs/sinus_visualization.html in a browser.")

## Step 7: Longitudinal Tracking Setup

In [None]:
# Example: Compare multiple timepoints
from pathlib import Path
import json
import pandas as pd
import matplotlib.pyplot as plt

# Assume we have multiple reports (you would generate these from different scans)
metrics_dir = Path('../docs/metrics')
report_files = sorted(metrics_dir.glob('*_report.json'))

if len(report_files) > 0:
    # Compile metrics into DataFrame
    data = []
    for report_file in report_files:
        with open(report_file) as f:
            r = json.load(f)
            data.append({
                'date': r['study_date'],
                'patient_id': r['patient_id'],
                'air_volume_ml': r['volumetric']['air_volume_ml'],
                'tissue_volume_ml': r['volumetric']['soft_tissue_volume_ml'],
                'air_fraction': r['volumetric']['air_fraction'],
            })
    
    df = pd.DataFrame(data)
    df = df.sort_values('date')
    
    # Plot trend
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    axes[0].plot(df['date'], df['air_volume_ml'], 'o-', linewidth=2, markersize=8)
    axes[0].set_xlabel('Study Date')
    axes[0].set_ylabel('Air Volume (mL)')
    axes[0].set_title('Sinus Air Volume Over Time')
    axes[0].grid(True, alpha=0.3)
    
    axes[1].plot(df['date'], df['air_fraction'] * 100, 'o-', color='coral', linewidth=2, markersize=8)
    axes[1].set_xlabel('Study Date')
    axes[1].set_ylabel('Air Fraction (%)')
    axes[1].set_title('Air Fraction Trend')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\nLongitudinal tracking data:")
    print(df.to_string(index=False))
else:
    print("No multiple timepoint reports found yet. Run analysis on multiple scans to enable tracking.")

## Next Steps & Research Directions

### Immediate:
1. **Collect more real data**: Process all DICOM series in `data/raw/5301/` directories
2. **Augment training**: Combine synthetic + real data for better generalization
3. **Validate segmentation**: Compare model predictions with radiologist annotations

### ML Improvements:
4. **Multi-class segmentation**: Separate labels for each sinus (maxillary, frontal, ethmoid, sphenoid)
5. **Transfer learning**: Fine-tune from pre-trained medical imaging models
6. **Ensemble methods**: Combine multiple models for robust predictions

### Clinical Analysis:
7. **Literature comparison**: Compare your metrics with published normal ranges
8. **Ostiomeatal complex analysis**: Quantify critical drainage pathways
9. **Correlation studies**: Link metrics to symptoms (congestion, headache, etc.)

### Automation:
10. **Dashboard creation**: Build Streamlit/Dash app for easy analysis
11. **Reporting templates**: Auto-generate medical reports with visualizations
12. **Integration**: Connect with PACS systems for clinical deployment