In [None]:
from typing import Tuple
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
def train_model(
    X, y,
    model_type: str = 'logistic'
):
    """
    Train a classification model.

    Args:
        X: Features
        y: Labels
        model_type: 'logistic' or 'random_forest'
    Returns:
        Trained model
    """
    if model_type == 'logistic':
        model = LogisticRegression(solver='liblinear')
        model.fit(X, y)
    elif model_type == 'random_forest':
        params = {'n_estimators': [100, 200], 'max_depth': [None, 5]}
        rf = RandomForestClassifier(random_state=0)
        model = GridSearchCV(rf, params, cv=3)
        model.fit(X, y)
    else:
        raise ValueError(f"Unknown model_type: {model_type}")
    return model

In [None]:
def apply_threshold(
    model,
    X,
    policy: str = 'fixed',
    threshold: float = 0.5
) -> Tuple[np.ndarray, float]:
    """
    Generate predictions using a threshold policy.

    Args:
        model: Trained classifier
        X: Features
        policy: 'fixed' or 'optimized'
        threshold: threshold for 'fixed'
    Returns:
        y_pred: Binary predictions
        threshold: Used threshold
    """
    probs = model.predict_proba(X)[:, 1]
    if policy == 'fixed':
        thr = threshold
    elif policy == 'optimized':
        # Simple: use median probability
        thr = np.median(probs)
    else:
        raise ValueError(f"Unknown policy: {policy}")

    y_pred = (probs >= thr).astype(int)
    return y_pred, thr