<a href="https://colab.research.google.com/github/joshua12cx/profe-programa/blob/main/Preeclampsia_Pipeline_Full_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preeclampsia Risk Classification — Full Pipeline (v2)

Este notebook contiene el pipeline completo (pasos 1–12) listo para ejecutar en Jupyter.
Asegúrate de tener `Preeclampsia_data_clean.csv` en la misma carpeta.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import json
import datetime
import warnings
import platform
from pprint import pprint

import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate, RandomizedSearchCV, cross_val_predict
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

from sklearn.metrics import (
    accuracy_score, balanced_accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, average_precision_score, confusion_matrix
)

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

try:
    from xgboost import XGBClassifier
    _HAS_XGB = True
except Exception:
    XGBClassifier = None
    _HAS_XGB = False

try:
    from lightgbm import LGBMClassifier
    _HAS_LGB = True
except Exception:
    LGBMClassifier = None
    _HAS_LGB = False

try:
    from catboost import CatBoostClassifier
    _HAS_CAT = True
except Exception:
    CatBoostClassifier = None
    _HAS_CAT = False

# Optional: SHAP
try:
    import shap
    _HAS_SHAP = True
except Exception:
    shap = None
    _HAS_SHAP = False

warnings.filterwarnings("ignore")
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)


In [None]:

# -------------------------------
# Configuration
# -------------------------------
DATA_FILE = "Preeclampsia_data_clean.csv"  # adjust path if needed
TARGET = "State"  # expected values: 'SIN RIESGO' / 'RIESGO'
TS = datetime.datetime.now().strftime("%Y%m%d_%H%M")
REPORTS_DIR = "reports"
ARTIFACTS_DIR = os.path.join("artefactos", TS)
os.makedirs(REPORTS_DIR, exist_ok=True)
os.makedirs(ARTIFACTS_DIR, exist_ok=True)


In [None]:

# -------------------------------
# 1) Load data and target
# -------------------------------
assert os.path.exists(DATA_FILE), f"Data file not found: {DATA_FILE}"
df = pd.read_csv(DATA_FILE)

# Map labels
label_map = {"SIN RIESGO": 0, "RIESGO": 1}
if TARGET not in df.columns:
    raise RuntimeError(f"Target column '{TARGET}' not found in data")

y = df[TARGET].map(label_map)
if y.isna().any():
    raise RuntimeError("Some target values couldn't be mapped to label_map. Check unique values:", df[TARGET].unique())

y = y.astype(int)
X = df.drop(columns=[TARGET])

print("Step 1 — Data loaded")
print("Shape:", X.shape, "| Prevalence(RIESGO=1):", float((y==1).mean()))

# Export overview
pd.DataFrame({"key":["data_file","n_rows","n_cols","prevalence_RIESGO"],
              "value":[DATA_FILE, X.shape[0], X.shape[1], float((y==1).mean())]}).to_csv(
    os.path.join(REPORTS_DIR, f"step1_overview_{TS}.csv"), index=False
)


Step 1 — Data loaded
Shape: (1800, 9) | Prevalence(RIESGO=1): 0.45444444444444443


In [None]:

# -------------------------------
# 2) Train/test split (80/20 stratified)
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=RANDOM_STATE, stratify=y
)
print("Step 2 — Split done: Train", X_train.shape, "Test", X_test.shape)
pd.DataFrame({"key":["n_train","n_test","train_prevalence","test_prevalence"],
              "value":[len(y_train), len(y_test), float((y_train==1).mean()), float((y_test==1).mean())]}).to_csv(
    os.path.join(REPORTS_DIR, f"step2_split_meta_{TS}.csv"), index=False
)


Step 2 — Split done: Train (1440, 9) Test (360, 9)


In [None]:

# -------------------------------
# 3) Preprocessing: infer numerical/categorical
# -------------------------------
num_features = X_train.select_dtypes(include=["number"]).columns.tolist()
cat_features = X_train.select_dtypes(include=["object","category","string"]).columns.tolist()

print("Numerical cols:", num_features)
print("Categorical cols:", cat_features)

# If you have an ordinal column, specify here (example: 'Nivel estudio')
ordinal_columns = []  # e.g. ["Nivel estudio"]
ordinal_categories = []  # e.g. [["Primaria","Secundaria","Tecnico","Bachiller","Licenciado","Maestria","Doctorado"]]

# Build transformer
transformers = []
if num_features:
    transformers.append(("num", StandardScaler(), num_features))
if cat_features:
    # sparse_output param may differ by sklearn version; try fallback if error
    try:
        transformers.append(("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_features))
    except TypeError:
        transformers.append(("cat", OneHotEncoder(handle_unknown="ignore", sparse=False), cat_features))
if ordinal_columns:
    transformers.append(("ord", OrdinalEncoder(categories=ordinal_categories, handle_unknown="use_encoded_value", unknown_value=-1), ordinal_columns))

from sklearn.compose import ColumnTransformer
preprocessor = ColumnTransformer(transformers=transformers, remainder="drop", verbose_feature_names_out=False)

# SMOTE for imbalance (keeps pipeline unified)
smote = SMOTE(random_state=RANDOM_STATE)

def build_pipe(model):
    # imblearn pipeline: prep -> smote -> model
    return ImbPipeline([("prep", preprocessor), ("smote", smote), ("model", model)])
# Save input schema
input_schema = {c: str(X[c].dtype) for c in X.columns}
json.dump(input_schema, open(os.path.join(ARTIFACTS_DIR, "input_schema.json"), "w", encoding="utf-8"), ensure_ascii=False, indent=2)
json.dump(label_map, open(os.path.join(ARTIFACTS_DIR, "label_map.json"), "w", encoding="utf-8"), ensure_ascii=False, indent=2)


Numerical cols: ['edad', 'imc', 'p_a_sistolica', 'p_a_diastolica', 'creatinina']
Categorical cols: ['hipertension', 'diabetes', 'ant_fam_hiper', 'tec_repro_asistida']


In [None]:

# -------------------------------
# 4) Candidate models
# -------------------------------
candidates = [
    ("LRN", LogisticRegression(max_iter=2000, random_state=RANDOM_STATE)),
    ("LDA", LinearDiscriminantAnalysis()),
    ("KNN", KNeighborsClassifier()),
    ("GNB", GaussianNB()),
    ("DTS", DecisionTreeClassifier(random_state=RANDOM_STATE)),
    ("RFS", RandomForestClassifier(n_estimators=300, random_state=RANDOM_STATE, n_jobs=-1)),
    ("MLP", MLPClassifier(hidden_layer_sizes=(64,), max_iter=600, random_state=RANDOM_STATE)),
]
if _HAS_XGB:
    candidates.append(("XGB", XGBClassifier(tree_method="hist", eval_metric="logloss", random_state=RANDOM_STATE, n_estimators=400)))
if _HAS_LGB:
    candidates.append(("LGB", LGBMClassifier(n_estimators=500, random_state=RANDOM_STATE)))
if _HAS_CAT:
    candidates.append(("CAT", CatBoostClassifier(iterations=600, random_state=RANDOM_STATE, verbose=False, allow_writing_files=False)))

print("Step 4 — Candidates:", [n for n,_ in candidates])

# -------------------------------
# 5) Baseline CV (5-fold) — scoring set
# -------------------------------
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
scoring = {"accuracy":"accuracy","f1_macro":"f1_macro","roc_auc":"roc_auc","average_precision":"average_precision"}

baseline_rows = []
for name, model in candidates:
    pipe = build_pipe(model)
    scores = cross_validate(pipe, X_train, y_train, cv=cv, scoring=scoring, n_jobs=-1, return_train_score=False)
    baseline_rows.append({
        "model": name,
        "acc_mean": float(scores["test_accuracy"].mean()),
        "f1_mean": float(scores["test_f1_macro"].mean()),
        "roc_auc_mean": float(scores["test_roc_auc"].mean()),
        "pr_auc_mean": float(scores["test_average_precision"].mean())
    })
    print(f"{name} | ACC {baseline_rows[-1]['acc_mean']:.3f} | F1 {baseline_rows[-1]['f1_mean']:.3f} | AUC {baseline_rows[-1]['roc_auc_mean']:.3f}")

baseline_df = pd.DataFrame(baseline_rows).sort_values(["f1_mean","pr_auc_mean","roc_auc_mean"], ascending=False).reset_index(drop=True)
baseline_df.to_csv(os.path.join(REPORTS_DIR, f"step5_baseline_metrics_{TS}.csv"), index=False)
baseline_best_name = baseline_df.iloc[0]["model"]
baseline_best_model = dict(candidates)[baseline_best_name]
print("Step 5 — Baseline best:", baseline_best_name)


Step 4 — Candidates: ['LRN', 'LDA', 'KNN', 'GNB', 'DTS', 'RFS', 'MLP', 'XGB', 'LGB']
LRN | ACC 0.882 | F1 0.881 | AUC 0.965
LDA | ACC 0.881 | F1 0.880 | AUC 0.963
KNN | ACC 0.870 | F1 0.869 | AUC 0.945
GNB | ACC 0.874 | F1 0.873 | AUC 0.948
DTS | ACC 0.883 | F1 0.882 | AUC 0.882
RFS | ACC 0.940 | F1 0.939 | AUC 0.990
MLP | ACC 0.979 | F1 0.979 | AUC 0.999
XGB | ACC 0.963 | F1 0.963 | AUC 0.995
LGB | ACC 0.963 | F1 0.962 | AUC 0.996
Step 5 — Baseline best: MLP


In [None]:

# -------------------------------
# 6) Tuning with RandomizedSearchCV (example space)
# -------------------------------
from scipy.stats import randint, uniform
try:
    from scipy.stats import loguniform
except Exception:
    from sklearn.utils.fixes import loguniform

param_spaces = {
    "LRN": {"model__C": loguniform(1e-2, 1e1), "model__penalty":["l2"]},
    "RFS": {"model__n_estimators": randint(200,500), "model__max_depth": randint(4,14)},
    "XGB": {"model__n_estimators": randint(200,600), "model__learning_rate": loguniform(5e-3, 2e-1)} if _HAS_XGB else {},
    "LGB": {"model__n_estimators": randint(200,800), "model__learning_rate": loguniform(5e-3, 2e-1)} if _HAS_LGB else {},
    "CAT": {"model__iterations": randint(200,700), "model__learning_rate": loguniform(5e-3, 2e-1)} if _HAS_CAT else {},
}

to_tune = [(k, dict(candidates)[k]) for k in ["LRN","RFS","XGB","LGB","CAT"] if k in dict(candidates)]

opt_rows = []
best_models = []
cache_dir = None
try:
    cache_dir = os.path.join(".skcache", TS)
    os.makedirs(cache_dir, exist_ok=True)
    for name, base_model in to_tune:
        pipe = build_pipe(base_model)
        try: pipe.set_params(memory=cache_dir)
        except: pass
        heavy = name in ["XGB","LGB","CAT"]
        search = RandomizedSearchCV(pipe, param_spaces.get(name, {}), n_iter=(15 if heavy else 12), cv=(StratifiedKFold(n_splits=3) if heavy else cv), scoring={"f1_macro":"f1_macro"}, refit="f1_macro", n_jobs=-1, random_state=RANDOM_STATE, verbose=0)
        search.fit(X_train, y_train)
        best_models.append((name, search.best_estimator_, float(search.best_score_), search.best_params_))
        # store brief metrics
        opt_rows.append({"model":name, "f1_mean": float(search.best_score_)})
        print(f"Tuning {name}: best f1={search.best_score_:.4f}")
    if best_models:
        best_models.sort(key=lambda x: x[2], reverse=True)
        best_name, final_pipe_opt, best_cv_f1, best_params = best_models[0]
        print("Step 6 — Tuning winner:", best_name, best_cv_f1)
    else:
        final_pipe_opt = None
finally:
    pass

# Save tuning report
if opt_rows:
    pd.DataFrame(opt_rows).to_csv(os.path.join(REPORTS_DIR, f"step6_tuning_metrics_{TS}.csv"), index=False)


Tuning LRN: best f1=0.8881
Tuning RFS: best f1=0.9444
Tuning XGB: best f1=0.9565
[LightGBM] [Info] Number of positive: 786, number of negative: 786
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 569
[LightGBM] [Info] Number of data points in the train set: 1572, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Tuning LGB: best f1=0.9579
Step 6 — Tuning winner: LGB 0.9578856749024363


In [None]:

# -------------------------------
# 7) Fair comparison (same CV) between baseline_best and tuned_best
# -------------------------------
same_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
pipe_baseline_best = build_pipe(baseline_best_model)
pipe_tuned_best = final_pipe_opt if final_pipe_opt is not None else pipe_baseline_best

def cv_summary(pipe):
    scoring7 = {"accuracy":"accuracy","balanced_accuracy":"balanced_accuracy","f1_macro":"f1_macro","roc_auc":"roc_auc","average_precision":"average_precision"}
    s = cross_validate(pipe, X_train, y_train, cv=same_cv, scoring=scoring7, n_jobs=-1)
    return {k: float(s[k].mean()) for k in s if k.startswith("test_")}

row_base = cv_summary(pipe_baseline_best)
row_tune = cv_summary(pipe_tuned_best)

f1_base = row_base.get("test_f1_macro", None)
f1_tune = row_tune.get("test_f1_macro", None)

delta = (f1_tune - f1_base) if (f1_base is not None and f1_tune is not None) else 0.0
winner_name, winner_pipe = (baseline_best_name, pipe_baseline_best) if delta < 0.005 else (best_name, pipe_tuned_best)
print("Step 7 — Selected for TEST:", winner_name)

# export compare
compare_df = pd.DataFrame([{"model":"Baseline("+baseline_best_name+")","f1":f1_base},{"model":"Tuned("+ (best_name if 'best_name' in globals() else '')+")","f1":f1_tune}])
compare_df.to_csv(os.path.join(REPORTS_DIR, f"step7_fair_compare_{TS}.csv"), index=False)


Step 7 — Selected for TEST: MLP


In [None]:

# -------------------------------
# 8) Decision policy: choose threshold using OOF probabilities maximizing F1_macro
# -------------------------------
cv_thr = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
oof_proba = cross_val_predict(winner_pipe, X_train, y_train, cv=cv_thr, method="predict_proba", n_jobs=-1)[:,1]

thresholds = np.linspace(0.05, 0.95, 91)
rows = []
for thr in thresholds:
    yhat = (oof_proba >= thr).astype(int)
    rows.append({
        "thr": float(thr),
        "acc": accuracy_score(y_train, yhat),
        "balacc": balanced_accuracy_score(y_train, yhat),
        "prec": precision_score(y_train, yhat, zero_division=0),
        "rec": recall_score(y_train, yhat, zero_division=0),
        "f1_macro": f1_score(y_train, yhat, average="macro")
    })
thr_df = pd.DataFrame(rows).sort_values(["f1_macro","balacc","prec","rec"], ascending=False).reset_index(drop=True)
BEST_THR = float(thr_df.iloc[0]["thr"])
print("Step 8 — BEST_THR (OOF):", BEST_THR)
thr_df.to_csv(os.path.join(REPORTS_DIR, f"step8_threshold_sweep_{TS}.csv"), index=False)

# Save decision policy
policy = {
    "winner": winner_name,
    "threshold": float(BEST_THR),
}
json.dump(policy, open(os.path.join(ARTIFACTS_DIR, "decision_policy.json"), "w", encoding="utf-8"), ensure_ascii=False, indent=2)


Step 8 — BEST_THR (OOF): 0.31999999999999995


In [None]:

# -------------------------------
# 9) Final evaluation on TEST
# -------------------------------
print("Training winner pipeline on full TRAIN set...")
winner_pipe.fit(X_train, y_train)
proba_test = winner_pipe.predict_proba(X_test)[:,1]
y_pred_test = (proba_test >= BEST_THR).astype(int)

test_metrics = {
    "accuracy": float(accuracy_score(y_test, y_pred_test)),
    "precision": float(precision_score(y_test, y_pred_test, zero_division=0)),
    "recall": float(recall_score(y_test, y_pred_test)),
    "f1": float(f1_score(y_test, y_pred_test)),
    "roc_auc": float(roc_auc_score(y_test, proba_test)),
    "pr_auc": float(average_precision_score(y_test, proba_test)),
    "confusion_matrix": confusion_matrix(y_test, y_pred_test).tolist()
}
print("Step 9 — Test metrics:")
print(json.dumps(test_metrics, indent=2))
pd.DataFrame([test_metrics]).to_csv(os.path.join(REPORTS_DIR, f"step9_test_metrics_{TS}.csv"), index=False)


Training winner pipeline on full TRAIN set...
Step 9 — Test metrics:
{
  "accuracy": 0.975,
  "precision": 0.9585798816568047,
  "recall": 0.9878048780487805,
  "f1": 0.972972972972973,
  "roc_auc": 0.9986933797909409,
  "pr_auc": 0.9984729917799233,
  "confusion_matrix": [
    [
      189,
      7
    ],
    [
      2,
      162
    ]
  ]
}


In [None]:

# -------------------------------
# 10) Interpretability + error analysis
# -------------------------------
# Save feature importances (if available) or permutation importance
try:
    # Try SHAP (preferred)
    if _HAS_SHAP:
        # extract underlying trained model (last step 'model' in pipeline)
        trained_model = winner_pipe.named_steps['model']
        # Need to pass preprocessed data to explainer (use pipeline prep only)
        X_test_prep = winner_pipe.named_steps['prep'].transform(X_test)
        explainer = shap.Explainer(trained_model.predict_proba, X_test_prep)
        shap_values = explainer(X_test_prep)
        shap.summary_plot(shap_values, X_test_prep, show=False)
        # save summary plot
        try:
            import matplotlib.pyplot as plt
            plt.tight_layout()
            plt.savefig(os.path.join(REPORTS_DIR, f"step10_shap_summary_{TS}.png"))
            plt.close()
        except Exception:
            pass
    else:
        raise ImportError("SHAP not available")
except Exception:
    # fallback: permutation importance
    try:
        from sklearn.inspection import permutation_importance
        winner_pipe_fit = winner_pipe
        # compute on test set
        r = permutation_importance(winner_pipe_fit, X_test, y_test, n_repeats=10, random_state=RANDOM_STATE, n_jobs=-1)
        feat_names = winner_pipe_fit.named_steps['prep'].get_feature_names_out() if hasattr(winner_pipe_fit.named_steps['prep'], 'get_feature_names_out') else X_test.columns
        imp_df = pd.DataFrame({"feature": feat_names, "importance_mean": r['importances_mean']}).sort_values('importance_mean', ascending=False)
        imp_df.to_csv(os.path.join(REPORTS_DIR, f"step10_permutation_importance_{TS}.csv"), index=False)
    except Exception:
        pass

# FP / FN examples
fp_idx = [i for i,(yt,yp) in enumerate(zip(y_test, y_pred_test)) if yt==0 and yp==1]
fn_idx = [i for i,(yt,yp) in enumerate(zip(y_test, y_pred_test)) if yt==1 and yp==0]

pd.DataFrame(X_test.iloc[fp_idx[:10]]).to_csv(os.path.join(REPORTS_DIR, f"step10_false_positives_{TS}.csv"), index=False)
pd.DataFrame(X_test.iloc[fn_idx[:10]]).to_csv(os.path.join(REPORTS_DIR, f"step10_false_negatives_{TS}.csv"), index=False)


PermutationExplainer explainer: 361it [00:22, 10.67it/s]                         


In [None]:

# -------------------------------
# 11) Export artifacts
# -------------------------------
# Save pipeline
PIPE_PATH = os.path.join(ARTIFACTS_DIR, f"pipeline_{winner_name}.joblib")
joblib.dump(winner_pipe, PIPE_PATH)

# model card minimum
model_card = {
    "version": TS,
    "system": f"Python {platform.python_version()}",
    "data_file": DATA_FILE,
    "target": TARGET,
    "winner": winner_name,
    "threshold": BEST_THR,
    "test_metrics": test_metrics
}
open(os.path.join(ARTIFACTS_DIR, "model_card.md"), "w", encoding="utf-8").write(json.dumps(model_card, indent=2, ensure_ascii=False))

# sample inputs/outputs
json.dump(X_test.iloc[:5].to_dict(orient="records"), open(os.path.join(ARTIFACTS_DIR, "sample_inputs.json"), "w", encoding="utf-8"), ensure_ascii=False, indent=2)
json.dump([{"proba": float(p), "pred": int(pi)} for p,pi in zip(proba_test[:5], y_pred_test[:5])], open(os.path.join(ARTIFACTS_DIR, "sample_outputs.json"), "w", encoding="utf-8"), ensure_ascii=False, indent=2)

print("Step 11 — Artifacts exported to:", ARTIFACTS_DIR)


Step 11 — Artifacts exported to: artefactos/20251108_2115


In [None]:

# -------------------------------
# 12) Load artifacts & inference helpers
# -------------------------------
# These functions can be reused directly in a REST API or Streamlit app

INPUT_SCHEMA = input_schema
LABEL_MAP = label_map
POLICY = policy
WINNER = winner_name
THRESHOLD = BEST_THR
REV_LABEL = {v:k for k,v in LABEL_MAP.items()}
PIPE = winner_pipe
FEATURES = list(INPUT_SCHEMA.keys())


def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
    # Ensure all expected columns exist and types are coerced
    df = df.copy()
    for c, t in INPUT_SCHEMA.items():
        if c not in df.columns:
            df[c] = np.nan
        # Basic coercion for numeric-like types
        if str(t).startswith(("int","float","double")):
            df[c] = pd.to_numeric(df[c], errors='coerce')
        else:
            df[c] = df[c].astype("string")
    return df[FEATURES]


def predict_batch(records, thr=None):
    thr = THRESHOLD if thr is None else float(thr)
    if isinstance(records, dict):
        records = [records]
    df = pd.DataFrame(records)
    df = _coerce_and_align(df)
    proba = PIPE.predict_proba(df)[:,1]
    yhat = (proba >= thr).astype(int)
    return [{"proba_POS": float(p), "pred_num": int(y), "pred_label": REV_LABEL[int(y)], "threshold": thr} for p,y in zip(proba,yhat)]


def predict_one(record, thr=None):
    return predict_batch(record, thr=thr)[0]

# quick smoke demo
if __name__ == '__main__':
    print('Demo predict on first sample inputs:')
    samples_path = os.path.join(ARTIFACTS_DIR, 'sample_inputs.json')
    if os.path.exists(samples_path):
        samples = json.load(open(samples_path, 'r', encoding='utf-8'))
        pprint(predict_batch(samples[:3]))
    else:
        print('No sample_inputs.json found. Run training first or provide a sample.')


Demo predict on first sample inputs:
[{'pred_label': 'RIESGO',
  'pred_num': 1,
  'proba_POS': 0.9998328018896628,
  'threshold': 0.31999999999999995},
 {'pred_label': 'SIN RIESGO',
  'pred_num': 0,
  'proba_POS': 1.0234411226202388e-08,
  'threshold': 0.31999999999999995},
 {'pred_label': 'RIESGO',
  'pred_num': 1,
  'proba_POS': 0.808234195694986,
  'threshold': 0.31999999999999995}]
