In [746]:
import numpy as np
import pandas as pd

from sklearn.ensemble import IsolationForest
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

In [747]:
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

FEATURES = ["heart_rate", "spo2", "temperature_f", "stress"]

### 1. Synthetic baseline generator (Gaussian)

In [748]:
# Generate baseline data to train the model
def simulate_data():
    return {
        'heart_rate': np.clip(np.random.normal(72, 10), 60,100),     # normal HR 60-100 bpm
        'spo2': np.clip(np.random.normal(97, 2), 95,100),            # normal SpO2 ~98%
        'temperature_f': np.clip(np.random.normal(98, 1), 97,99),    # Fahrenheit
        'stress': np.clip(np.random.normal(3, 2), 1,6)               # scale 1â€“10
    }

# Generate baseline dataset to train the model
def generate_baseline_data(n):
    return pd.DataFrame([simulate_data() for _ in range(n)])

### 2. Inject anomalies into a copy of baseline data

In [749]:
def generate_anomalous_data(n):
    
    return pd.DataFrame({
        
        # tachycardia but not extreme
        'heart_rate': np.clip(np.random.normal(95,7,n), 80,120),

        # mild hypoxia
        'spo2': np.clip(np.random.normal(93,1.5,n), 88,95),

        # low grade fever
        'temperature_f': np.clip(np.random.normal(100.2,0.8,n), 99,102),

        # high stress
        'stress': np.clip(np.random.normal(7,1.5,n), 5,10)

    })

### 3. Train Isolation Forest on healthy baseline only

In [750]:
def train_isolation_forest(X_train: pd.DataFrame) -> IsolationForest:
    
    # n_estimators and max_samples left as defaults
    model = IsolationForest(
        contamination=0.01,
        random_state=RANDOM_STATE,
    )
    
    model.fit(X_train)
    
    return model


### 4. Preditct Isolation Forest on test set with 10% injected anomalies

In [751]:
def predict_labels(model: IsolationForest, X: pd.DataFrame) -> np.ndarray:

    pred = model.predict(X)

    # IsolationForest returns 1 for inliers and -1 for outliers.
    # Converting to y_pred: 0 normal, 1 anomaly
    y_pred = (pred == -1).astype(int)

    return y_pred

### 5. Evaluation

In [752]:
def evaluate(y_true: np.ndarray, y_pred: np.ndarray) -> dict:

    # Metrics per report: Precision, Recall, F1, False Positive Rate (FPR)

    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)

    # Confusion matrix layout: [[TN, FP],[FN, TP]]
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0

    return {
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "fpr": fpr,
        "tn": tn,
        "fp": fp,
        "fn": fn,
        "tp": tp,
    }

In [753]:
# Training set: 10,000
X_train = generate_baseline_data(50000)

# Test set: 10,000 with 10% injected anomalies
normal_test = generate_baseline_data(9000)
anomalous_test = generate_anomalous_data(1000)

X_test = pd.concat([normal_test, anomalous_test])
y_test = np.array([0]*9000 + [1]*1000)

# Train
model = train_isolation_forest(X_train[FEATURES])

In [754]:
# Evaluate without Scoring
y_pred = predict_labels(model, X_test[FEATURES])
metrics = evaluate(y_test, y_pred)

print("Isolation Forest evaluation (synthetic test with injected anomalies)")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall:    {metrics['recall']:.4f}")
print(f"F1:        {metrics['f1']:.4f}")
print(f"FPR:       {metrics['fpr']:.4f}")
print(f"Confusion: TN={metrics['tn']} FP={metrics['fp']} FN={metrics['fn']} TP={metrics['tp']}")

Isolation Forest evaluation (synthetic test with injected anomalies)
Precision: 0.8969
Recall:    0.7920
F1:        0.8412
FPR:       0.0101
Confusion: TN=8909 FP=91 FN=208 TP=792


In [755]:
# Evaluate with Scoring
scores = -model.decision_function(X_test[FEATURES])
threshold = np.percentile(scores, 90) # sapmple threshold = 0.0
y_pred = (scores >= threshold).astype(int)

metrics = evaluate(y_test, y_pred)

print("Isolation Forest evaluation (synthetic test with injected anomalies)")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall:    {metrics['recall']:.4f}")
print(f"F1:        {metrics['f1']:.4f}")
print(f"FPR:       {metrics['fpr']:.4f}")
print(f"Confusion: TN={metrics['tn']} FP={metrics['fp']} FN={metrics['fn']} TP={metrics['tp']}")

Isolation Forest evaluation (synthetic test with injected anomalies)
Precision: 0.8570
Recall:    0.8570
F1:        0.8570
FPR:       0.0159
Confusion: TN=8857 FP=143 FN=143 TP=857
