# 03 — Flexible Model Controller (Parametrizable)
*Updated: 2025-10-14 18:26*

This version lets you pass **params** for `scaler`, `sampler`, and `clf` via either:
1) Direct instances (`StandardScaler()`, `SMOTE(k_neighbors=7)`, `RandomForestClassifier(n_estimators=400)`) or
2) **Keys + params** (e.g., `clf='rf', clf_params={'n_estimators':300, 'max_depth':10}`).

It supports: LogisticRegression, DecisionTree, RandomForest, SVC, Perceptron (SLP), KNeighbors, and KMeans (with optional label mapping).

In [None]:
import numpy as np
import pandas as pd
from typing import Optional, Dict, Any, Union

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans

try:
    from imblearn.pipeline import Pipeline as ImbPipeline
    from imblearn.over_sampling import SMOTE, RandomOverSampler, ADASYN
    from imblearn.under_sampling import RandomUnderSampler, NearMiss
    from imblearn.combine import SMOTEENN, SMOTETomek
except Exception:
    ImbPipeline = None
    SMOTE = RandomOverSampler = ADASYN = None
    RandomUnderSampler = NearMiss = None
    SMOTEENN = SMOTETomek = None

RANDOM_STATE = 42

# ---------------- Helpers ----------------
def _ensure_2d(X):
    if isinstance(X, pd.Series):
        return X.to_frame()
    return X

def _extract_numeric_features(X):
    if isinstance(X, pd.DataFrame):
        return X.select_dtypes(include=[np.number]).columns.tolist()
    return list(range(X.shape[1]))

def _proba_from_estimator(clf, X):
    if hasattr(clf, 'predict_proba'):
        proba = clf.predict_proba(X)
        return proba[:, 1] if proba.ndim == 2 and proba.shape[1] >= 2 else proba.ravel()
    if hasattr(clf, 'decision_function'):
        score = clf.decision_function(X)
        score = np.asarray(score).reshape(-1)
        mn, mx = score.min(), score.max()
        if mx - mn < 1e-12:
            return np.full_like(score, 0.5, dtype=float)
        return (score - mn) / (mx - mn)
    return clf.predict(X)

# ---------------- Registries ----------------
SCALERS = {
    'standard': StandardScaler,
    'minmax': MinMaxScaler,
    'robust': RobustScaler,
}

SAMPLERS = {
    'smote': SMOTE,
    'ros': RandomOverSampler,
    'adasyn': ADASYN,
    'rus': RandomUnderSampler,
    'nearmiss': NearMiss,
    'smoteenn': SMOTEENN,
    'smotetomek': SMOTETomek,
}

CLFS = {
    'lr': LogisticRegression,
    'logreg': LogisticRegression,
    'decisiontree': DecisionTreeClassifier,
    'dt': DecisionTreeClassifier,
    'rf': RandomForestClassifier,
    'randomforest': RandomForestClassifier,
    'svm': SVC,
    'svc': SVC,
    'slp': Perceptron,
    'perceptron': Perceptron,
    'knn': KNeighborsClassifier,
    'kmeans': KMeans,
}

def _build_component(component: Union[str, Any], params: Optional[Dict[str, Any]], registry: Dict[str, Any]):
    if component is None:
        return None
    if not isinstance(component, str):
        # already an instance
        if params:
            # try set_params if available
            if hasattr(component, 'set_params'):
                component.set_params(**params)
        return component
    key = component.lower()
    if key not in registry or registry[key] is None:
        raise ValueError(f"Unknown or unavailable component key: {component}")
    cls = registry[key]
    params = params or {}
    # inject RANDOM_STATE if supported and not provided
    if 'random_state' in cls().__dict__ if hasattr(cls, '__call__') else []:
        params.setdefault('random_state', RANDOM_STATE)
    try:
        return cls(**params)
    except TypeError:
        return cls() if params is None else cls(**params)

class FlexibleModel:
    def __init__(self,
                 scaler: Union[str, Any, None] = 'standard',
                 sampler: Union[str, Any, None] = None,
                 clf: Union[str, Any, None] = 'lr',
                 scaler_params: Optional[Dict[str, Any]] = None,
                 sampler_params: Optional[Dict[str, Any]] = None,
                 clf_params: Optional[Dict[str, Any]] = None,
                 numeric_only: bool = True):
        self.numeric_only = numeric_only
        self.scaler = _build_component(scaler, scaler_params, SCALERS) if scaler is not None else None
        self.sampler = _build_component(sampler, sampler_params, SAMPLERS) if sampler is not None else None
        self.clf = _build_component(clf, clf_params, CLFS) if clf is not None else LogisticRegression(max_iter=1000)
        self.is_clustering = isinstance(self.clf, KMeans)
        if isinstance(self.clf, SVC) and not self.is_clustering:
            # ensure probability if not set
            if not hasattr(self.clf, 'probability') or not getattr(self.clf, 'probability'):
                self.clf.set_params(probability=True)
        self.pipeline = None
        self.cluster_label_map_ = None

    def _build_preprocessor(self, X):
        if self.scaler is None:
            return None
        X = _ensure_2d(X)
        cols = _extract_numeric_features(X) if (self.numeric_only and isinstance(X, pd.DataFrame)) else (X.columns if isinstance(X, pd.DataFrame) else list(range(X.shape[1])))
        num_pipe = Pipeline([
            ('impute', SimpleImputer(strategy='median')),
            ('scale', self.scaler)
        ])
        return ColumnTransformer([('num', num_pipe, cols)], remainder='passthrough')

    def _make_pipeline(self, X):
        pre = self._build_preprocessor(X)
        steps = []
        if pre is not None:
            steps.append(('pre', pre))
        steps.append(('clf', self.clf))
        if self.sampler is not None:
            if ImbPipeline is None:
                raise RuntimeError('imblearn is required for sampler pipelines but is not available.')
            self.pipeline = ImbPipeline(steps=[('pre', pre)] if pre is not None else [] + [('sampler', self.sampler), ('clf', self.clf)])
        else:
            self.pipeline = Pipeline(steps)

    def fit(self, X, y=None):
        X = _ensure_2d(X)
        self._make_pipeline(X)
        if self.is_clustering:
            self.pipeline.fit(X)
            if y is not None:
                clusters = self.pipeline.predict(X)
                y = np.asarray(y)
                self.cluster_label_map_ = {}
                for c in np.unique(clusters):
                    mask = clusters == c
                    vals, counts = np.unique(y[mask], return_counts=True)
                    self.cluster_label_map_[c] = vals[np.argmax(counts)]
            return self
        if y is None:
            raise ValueError('y must be provided for classification models.')
        self.pipeline.fit(X, y)
        return self

    def predict(self, X, y=None):
        X = _ensure_2d(X)
        if self.is_clustering:
            clusters = self.pipeline.predict(X)
            if self.cluster_label_map_ is not None:
                return np.array([self.cluster_label_map_.get(c, 0) for c in clusters])
            return clusters
        return self.pipeline.predict(X)

    def predict_proba(self, X):
        X = _ensure_2d(X)
        if self.is_clustering:
            clusters = self.pipeline.predict(X)
            if self.cluster_label_map_ is not None:
                labels = np.array([self.cluster_label_map_.get(c, 0) for c in clusters])
                return labels.astype(float)
            if hasattr(self.pipeline.named_steps['clf'], 'transform'):
                d = self.pipeline.named_steps['clf'].transform(X)
                d = np.linalg.norm(d, axis=1)
                return (d.max() - d) / (d.max() - d.min() + 1e-12)
            return np.zeros(len(X), dtype=float)
        clf = self.pipeline.named_steps['clf']
        # send X through preprocessor only if present
        Xproc = self.pipeline[:-1].transform(X) if 'pre' in self.pipeline.named_steps else X
        return _proba_from_estimator(clf, Xproc)

    def score(self, X, y, metric: str = 'accuracy'):
        metric = metric.lower()
        y_pred = self.predict(X)
        if metric in ('accuracy', 'acc'):
            return accuracy_score(y, y_pred)
        if metric in ('f1', 'f1_score'):
            return f1_score(y, y_pred)
        if metric in ('precision', 'prec'):
            return precision_score(y, y_pred)
        if metric in ('recall', 'tpr', 'sensitivity'):
            return recall_score(y, y_pred)
        if metric in ('roc_auc', 'auc'):
            y_score = self.predict_proba(X)
            return roc_auc_score(y, y_score)
        raise ValueError(f'Unknown metric: {metric}')

    def set_params(self, **kwargs):
        """Update params of underlying clf/scaler/sampler if available."""
        updated = []
        for part_name in ('clf', 'scaler', 'sampler'):
            part = getattr(self, part_name, None)
            part_kwargs = kwargs.get(part_name) or kwargs.get(part_name + '_params')
            if part is not None and part_kwargs:
                if hasattr(part, 'set_params'):
                    part.set_params(**part_kwargs)
                    updated.append(part_name)
        return updated

    def get_pipeline(self):
        return self.pipeline


In [None]:
# Quick examples (requires your X_train, y_train)
# 1) Instances
model1 = FlexibleModel(
    scaler='standard',
    sampler='smote',
    clf='rf',
    sampler_params={'k_neighbors': 7, 'random_state': 42},
    clf_params={'n_estimators': 300, 'max_depth': None, 'random_state': 42}
)
# model1.fit(X_train, y_train)
# y_pred = model1.predict(X_test)

# 2) SVM with kernel/params
model2 = FlexibleModel(clf='svm', clf_params={'kernel':'rbf', 'C':1.0, 'gamma':'scale'})

# 3) KMeans with label mapping
model3 = FlexibleModel(clf='kmeans', clf_params={'n_clusters':2, 'random_state':42})
print('Parametrizable FlexibleModel ready ✅')
