### Importación Librerias

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
import pickle
import joblib
from pathlib import Path
import os

In [None]:
dir_path = Path(os.getcwd())

### Carga de datos

In [None]:
iris_dataset = load_iris()
X = iris_dataset.data
y = iris_dataset.target

### Dataset splitting (Train & Test)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

### Scaling data (Mandatory)

El proceso de normalización es requerido en modelos como regresión logística ya que funcionan con optimización, y las columnas con mayor escala pueden sesgar los gradientes

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
joblib.dump(scaler, dir_path / "artifacts/scaler.joblib")

### Modelation

In [None]:
# Logistic Regression
log_reg = LogisticRegression(max_iter=200)
log_reg.fit(X_train_scaled, y_train)
proba_logreg = log_reg.predict_proba(X_test_scaled)
y_pred_logreg = log_reg.predict(X_test_scaled)


print("AUROC: ", roc_auc_score(y_test, proba_logreg, multi_class='ovr'))
print("Accuracy: ", accuracy_score(y_test, y_pred_logreg))

In [None]:
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
proba_rf = rf.predict_proba(X_test_scaled)
y_pred_rf = rf.predict(X_test_scaled)


print("AUROC: ", roc_auc_score(y_test, proba_rf, multi_class='ovr'))
print("Accuracy: ", accuracy_score(y_test, y_pred_rf))

In [None]:
# LightGBM
lgbm = LGBMClassifier(n_estimators=100, random_state=42, verbose=-1)
lgbm.fit(X_train_scaled, y_train)
proba_lgbm = lgbm.predict_proba(X_test_scaled)
y_pred_lgbm = lgbm.predict(X_test_scaled)


print("AUROC: ", roc_auc_score(y_test, proba_lgbm, multi_class='ovr'))
print("Accuracy: ", accuracy_score(y_test, y_pred_lgbm))


In [None]:
# ADD YOUR MODEL

### Model persistence

In [None]:
dir_path = Path(os.getcwd())

In [None]:
dir_path

In [None]:
# Save models using joblib
joblib.dump(log_reg, dir_path / "artifacts/log_reg.joblib")
joblib.dump(rf, dir_path / "artifacts/random_forest.joblib")
joblib.dump(lgbm, dir_path / "artifacts/lightgbm.joblib")