# Dockerized ML API â€” API Testing Notebook

**Author:** Mike Ichikawa  
**Date:** January 2026

Interactive exploration of all API endpoints. Run the API first (`uvicorn app.main:app --reload`) then execute these cells.

This notebook demonstrates load testing: 500 sequential predictions with latency distribution analysis.

In [None]:
import requests, json, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

BASE_URL = 'http://localhost:8000'
plt.rcParams.update({'figure.facecolor': 'white', 'axes.grid': True, 'grid.alpha': 0.3})

try:
    r = requests.get(f'{BASE_URL}/health', timeout=3)
    print(f'API status: {r.json()["status"]}')
    print(f'Model loaded: {r.json()["model_loaded"]}')
except Exception as e:
    print(f'Cannot reach API. Start it with: uvicorn app.main:app --reload')
    print(f'Error: {e}')

In [None]:
# Health and model info
print('GET /health')
print(json.dumps(requests.get(f'{BASE_URL}/health').json(), indent=2))
print('\nGET /model/info')
print(json.dumps(requests.get(f'{BASE_URL}/model/info').json(), indent=2))

In [None]:
# Single prediction - normal
r = requests.post(f'{BASE_URL}/predict', json={
    'features': [0.1, 0.2, -0.1, 0.3, 0.0, 0.15, -0.05, 0.25],
    'observation_id': 'test_normal_001'
})
print('Normal observation:')
print(json.dumps(r.json(), indent=2))

In [None]:
# Single prediction - anomaly
r = requests.post(f'{BASE_URL}/predict', json={
    'features': [8.5, -7.2, 9.1, -8.8, 7.6, -9.3, 8.2, -7.5],
    'observation_id': 'test_anomaly_001'
})
print('Anomalous observation:')
print(json.dumps(r.json(), indent=2))

In [None]:
# Batch prediction
rng = np.random.default_rng(42)
batch = {'observations': [{'features': rng.standard_normal(8).tolist()} for _ in range(20)]}
data = requests.post(f'{BASE_URL}/predict/batch', json=batch).json()
print(f'Batch 20 obs: {data["anomaly_count"]} anomalies ({data["anomaly_rate"]:.1%}), {data["batch_inference_ms"]:.1f}ms total')

In [None]:
# Latency benchmark
n = 500
latencies = []
rng = np.random.default_rng(0)
print(f'Running {n} predictions...')
for _ in range(n):
    t0 = time.perf_counter()
    requests.post(f'{BASE_URL}/predict', json={'features': rng.standard_normal(8).tolist()})
    latencies.append((time.perf_counter() - t0) * 1000)

la = np.array(latencies)
print(f'Mean: {la.mean():.2f}ms | Median: {np.median(la):.2f}ms | P95: {np.percentile(la,95):.2f}ms')

fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(la, bins=40, color='#2C7BB6', edgecolor='white', alpha=0.8)
ax.axvline(np.median(la), color='#D7191C', linestyle='--', label=f'Median: {np.median(la):.1f}ms')
ax.axvline(np.percentile(la, 95), color='orange', linestyle='--', label=f'P95: {np.percentile(la,95):.1f}ms')
ax.set_xlabel('Latency (ms)')
ax.set_ylabel('Count')
ax.set_title(f'API Latency Distribution ({n} requests)')
ax.legend()
plt.tight_layout()
plt.show()