# Federated Imputation Platform - Integration Test
Based on the Elwazi pilot node testing pattern

## Setup and Helper Functions

In [None]:
import requests
import json
import time
from typing import Dict, List, Any

def pretty_print_json(response):
    """Pretty print JSON in blue color"""
    print("\033[38;2;8;75;138m"+json.dumps(response.json(), indent=4)+"\033[0m")
    
def print_head(text):
    """Print in green color"""
    print("\033[38;2;8;138;75m"+text+"\033[0m")

def print_error(text):
    """Print in red color"""
    print("\033[38;2;255;0;0m"+text+"\033[0m")

## 1. Configure Test Parameters

In [None]:
# Central orchestrator configuration
CENTRAL_BASE_URL = "http://154.114.10.123:8000"

# Test user credentials (create a test user if needed)
TEST_USER = {
    "username": "admin",
    "email": "admin@example.com",
    "password": "admin123"
}

# Test reference panel
TEST_REFERENCE_PANEL = "1000G_phase3_v5"

# Authentication token (will be populated after login)
AUTH_TOKEN = None

## 2. Test Authentication

In [None]:
# Test login
login_url = f"{CENTRAL_BASE_URL}/api/auth/login/"
print_head(f"POST request to {login_url}")

login_response = requests.post(
    login_url,
    json={
        "username": TEST_USER["username"],
        "password": TEST_USER["password"]
    }
)

if login_response.status_code == 200:
    AUTH_TOKEN = login_response.json()["access"]
    print_head(f"✓ Login successful! Token: {AUTH_TOKEN[:30]}...")
else:
    print_error(f"✗ Login failed: {login_response.status_code}")
    pretty_print_json(login_response)

# Setup headers for authenticated requests
HEADERS = {
    "Authorization": f"Bearer {AUTH_TOKEN}",
    "Content-Type": "application/json"
}

## 3. Check Service Registry Endpoints

In [None]:
# Get list of available services
services_url = f"{CENTRAL_BASE_URL}/api/services/"
print_head(f"GET request to {services_url}")

services_response = requests.get(services_url, headers=HEADERS)

if services_response.status_code == 200:
    services = services_response.json()
    print_head(f"✓ Found {len(services)} services")
    
    # Filter for healthy services
    healthy_services = [s for s in services if s.get('is_healthy', False)]
    print_head(f"✓ {len(healthy_services)} healthy services available")
    
    # Display services by reference panel
    for service in healthy_services:
        print(f"  - {service['name']} ({service['url']})")
        print(f"    Panels: {', '.join([p['name'] for p in service.get('reference_panels', [])])}")
else:
    print_error(f"✗ Failed to get services: {services_response.status_code}")

## 4. Check Reference Panels

In [None]:
# Get reference panels
panels_url = f"{CENTRAL_BASE_URL}/api/reference-panels/"
print_head(f"GET request to {panels_url}")

panels_response = requests.get(panels_url, headers=HEADERS)

if panels_response.status_code == 200:
    panels = panels_response.json()
    print_head(f"✓ Found {len(panels)} reference panels")
    
    for panel in panels:
        print(f"  - {panel['name']} ({panel.get('build', 'N/A')})")
        print(f"    {panel.get('description', 'No description')}")
else:
    print_error(f"✗ Failed to get reference panels: {panels_response.status_code}")

## 5. Upload Test Input File

In [None]:
# Check if we have a test VCF file
import os

test_file_path = "/home/ubuntu/federated-imputation-central/sample_data/test_input.vcf.gz"

if not os.path.exists(test_file_path):
    print_error(f"✗ Test file not found: {test_file_path}")
    print("You need to provide a test VCF file. For H3Africa data, use:")
    print("  - Create sample_data directory")
    print("  - Add a small test VCF file (e.g., chr22 subset)")
else:
    print_head(f"✓ Test file found: {test_file_path}")
    print(f"  Size: {os.path.getsize(test_file_path) / 1024:.2f} KB")

## 6. Create and Submit Imputation Job

In [None]:
# Create job
jobs_url = f"{CENTRAL_BASE_URL}/api/jobs/"
print_head(f"POST request to {jobs_url}")

# Prepare multipart form data
if os.path.exists(test_file_path):
    with open(test_file_path, 'rb') as f:
        files = {'input_file': f}
        data = {
            'reference_panel': TEST_REFERENCE_PANEL,
            'chromosome': '22',  # Test with chr22 for speed
            'population': 'AFR',  # African populations
        }
        
        # Remove Content-Type header for multipart/form-data
        headers_no_ct = {"Authorization": f"Bearer {AUTH_TOKEN}"}
        
        job_response = requests.post(jobs_url, headers=headers_no_ct, files=files, data=data)
        
        if job_response.status_code == 201:
            job_data = job_response.json()
            job_id = job_data['id']
            print_head(f"✓ Job created successfully! Job ID: {job_id}")
            print(json.dumps(job_data, indent=2))
        else:
            print_error(f"✗ Failed to create job: {job_response.status_code}")
            print(job_response.text)
else:
    print_error("Skipping job creation - no test file available")
    job_id = None

## 7. Monitor Job Progress (Poll for Status)

In [None]:
if job_id:
    job_status_url = f"{CENTRAL_BASE_URL}/api/jobs/{job_id}/"
    print_head(f"Monitoring job {job_id}...")
    
    max_attempts = 60  # 5 minutes with 5-second intervals
    attempt = 0
    
    while attempt < max_attempts:
        status_response = requests.get(job_status_url, headers=HEADERS)
        
        if status_response.status_code == 200:
            job_status = status_response.json()
            current_status = job_status['status']
            
            print(f"  [{time.strftime('%H:%M:%S')}] Status: {current_status}")
            
            if current_status in ['completed', 'failed', 'cancelled']:
                print_head(f"\n✓ Job finished with status: {current_status}")
                
                if current_status == 'completed':
                    print("\nJob Results:")
                    print(json.dumps(job_status, indent=2))
                    
                    # Check for output file
                    if job_status.get('output_file'):
                        print_head(f"\n✓ Output file available: {job_status['output_file']}")
                elif current_status == 'failed':
                    print_error(f"\n✗ Job failed: {job_status.get('error_message', 'Unknown error')}")
                
                break
        else:
            print_error(f"✗ Failed to get job status: {status_response.status_code}")
            break
        
        attempt += 1
        time.sleep(5)  # Wait 5 seconds before next poll
    
    if attempt >= max_attempts:
        print_error("✗ Job monitoring timed out after 5 minutes")
else:
    print_error("Skipping job monitoring - no job was created")

## 8. Test Dashboard Statistics

In [None]:
# Get dashboard stats
stats_url = f"{CENTRAL_BASE_URL}/api/dashboard/stats/"
print_head(f"GET request to {stats_url}")

stats_response = requests.get(stats_url, headers=HEADERS)

if stats_response.status_code == 200:
    stats = stats_response.json()
    print_head("✓ Dashboard Statistics:")
    print(f"  Total Jobs: {stats.get('total_jobs', 0)}")
    print(f"  Completed: {stats.get('completed_jobs', 0)}")
    print(f"  Failed: {stats.get('failed_jobs', 0)}")
    print(f"  Running: {stats.get('running_jobs', 0)}")
    print(f"  Healthy Services: {stats.get('healthy_services', 0)}")
else:
    print_error(f"✗ Failed to get stats: {stats_response.status_code}")

## 9. Test Scatter-Gather Pattern
### Simulate how the system distributes work across multiple services

In [None]:
# Get services grouped by reference panel
services_response = requests.get(f"{CENTRAL_BASE_URL}/api/services/", headers=HEADERS)

if services_response.status_code == 200:
    services = services_response.json()
    
    # Group by reference panel
    services_by_panel = {}
    for service in services:
        if service.get('is_healthy', False):
            for panel in service.get('reference_panels', []):
                panel_name = panel['name']
                if panel_name not in services_by_panel:
                    services_by_panel[panel_name] = []
                services_by_panel[panel_name].append(service)
    
    print_head("\n✓ Services grouped by reference panel:")
    for panel_name, panel_services in services_by_panel.items():
        print(f"\n  {panel_name}:")
        for svc in panel_services:
            print(f"    - {svc['name']} ({svc['url']})")
            print(f"      Last health check: {svc.get('last_health_check', 'Never')}")
else:
    print_error("✗ Failed to group services")

## 10. Summary Report

In [None]:
print("="*60)
print("FEDERATED IMPUTATION PLATFORM - TEST SUMMARY")
print("="*60)

# Test results
test_results = [
    ("Authentication", AUTH_TOKEN is not None),
    ("Service Discovery", services_response.status_code == 200),
    ("Reference Panels", panels_response.status_code == 200),
    ("Dashboard Stats", stats_response.status_code == 200),
]

if job_id:
    test_results.append(("Job Creation", True))
    test_results.append(("Job Execution", job_status.get('status') == 'completed'))

print("\nTest Results:")
for test_name, passed in test_results:
    status = "✓ PASS" if passed else "✗ FAIL"
    color = "\033[38;2;8;138;75m" if passed else "\033[38;2;255;0;0m"
    print(f"  {color}{status}\033[0m - {test_name}")

total_tests = len(test_results)
passed_tests = sum(1 for _, passed in test_results if passed)

print(f"\n{passed_tests}/{total_tests} tests passed")
print("="*60)