# Desenvolvimento do modelo de score de cr√©dito

Explora√ß√£o inicial de modelos de base

In [14]:
import pandas as pd
from dagshub.data_engine import datasources
import mlflow
import dagshub
from sklearn.model_selection import train_test_split
import mlflow.sklearn
import mlflow.catboost
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, classification_report, ConfusionMatrixDisplay
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import mlflow.models.signature
from mlflow.models import infer_signature
from catboost import CatBoostRegressor
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

## Carregando Dataset

In [15]:
ds = datasources.get_datasource("pedromonnt/fiap-credit-score-classification-model", "processed")

In [16]:
ds.all().dataframe

Output()

Unnamed: 0,path,datapoint_id,dagshub_download_url,media type,size
0,train-processed.csv,103365851,https://dagshub.com/api/v1/repos/pedromonnt/fi...,text/plain,16635276


In [17]:
res = ds.head()

for dp in res:
    dataset_url = dp.download_url

Output()

In [18]:
dataset_url

'https://dagshub.com/api/v1/repos/pedromonnt/fiap-credit-score-classification-model/raw/main/data/processed/train-processed.csv'

In [19]:
df = pd.read_csv(dataset_url)
df.head()

Unnamed: 0,Age,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Changed_Credit_Limit,...,Payment_of_Min_Amount_NM,Payment_of_Min_Amount_No,Payment_of_Min_Amount_Yes,Payment_Behaviour_Desconhecido,Payment_Behaviour_High_spent_Large_value_payments,Payment_Behaviour_High_spent_Medium_value_payments,Payment_Behaviour_High_spent_Small_value_payments,Payment_Behaviour_Low_spent_Large_value_payments,Payment_Behaviour_Low_spent_Medium_value_payments,Payment_Behaviour_Low_spent_Small_value_payments
0,23.0,19114.12,1824.843333,3.0,4.0,3.0,4.0,3.0,7.0,11.27,...,0,1,0,0,0,0,1,0,0,0
1,23.0,19114.12,3093.745,3.0,4.0,3.0,4.0,-1.0,14.0,11.27,...,0,1,0,0,0,0,0,1,0,0
2,-3.0,19114.12,3093.745,3.0,4.0,3.0,4.0,3.0,7.0,9.4,...,0,1,0,0,0,0,0,0,1,0
3,23.0,19114.12,3093.745,3.0,4.0,3.0,4.0,5.0,4.0,6.27,...,0,1,0,0,0,0,0,0,0,1
4,23.0,19114.12,1824.843333,3.0,4.0,3.0,4.0,6.0,14.0,11.27,...,0,1,0,0,0,1,0,0,0,0


## Desenvolvimento e experimentos de modelos

In [20]:
dagshub.init(repo_owner="pedromonnt", repo_name="fiap-credit-score-classification-model", mlflow=True)

In [21]:
mlflow.autolog()

2025/07/17 22:15:15 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/07/17 22:15:17 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/07/17 22:15:17 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


In [22]:
# Features e Target do conjunto de treino
y = df['Credit_Score']
X = df.drop(columns='Credit_Score')

# Dividir o conjunto de TREINO em treino e VALIDA√á√ÉO
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [23]:
def evaluate_and_log_model(kind, model_name, model, X_val, y_val):
    predictions = model.predict(X_val)
    pred_proba = model.predict_proba(X_val)

    # M√©tricas
    accuracy = accuracy_score(y_val, predictions)
    f1 = f1_score(y_val, predictions, average='weighted')
    precision = precision_score(y_val, predictions, average='weighted')
    recall = recall_score(y_val, predictions, average='weighted')
    roc_auc = roc_auc_score(y_val, pred_proba, multi_class='ovr', average='weighted')

    print(f"Resultados para {model_name}:")
    print(f"  Acur√°cia: {accuracy:.4f}")
    print(f"  F1-Score (Weighted): {f1:.4f}")
    print(f"  ROC AUC (Weighted): {roc_auc:.4f}")

    # Log de M√©tricas no MLflow
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_weighted", f1)
    mlflow.log_metric("precision_weighted", precision)
    mlflow.log_metric("recall_weighted", recall)
    mlflow.log_metric("roc_auc_weighted", roc_auc)

    signature = infer_signature(X_val, predictions)

    if kind == "catboost":
        mlflow.catboost.log_model(model, model_name, signature=signature, input_example=X_val[:5])
    elif kind == "xgboost":
        mlflow.xgboost.log_model(model, model_name, signature=signature, input_example=X_val[:5])
    elif kind == "lightgbm":
        mlflow.lightgbm.log_model(model, model_name, signature=signature, input_example=X_val[:5])
    else:
        mlflow.sklearn.log_model(model, model_name, signature=signature, input_example=X_val[:5])

    print(f"Model {model_name} logged with accuracy: {accuracy}, f1_weighted: {f1}, precision_weighted: {precision}, recall_weighted: {recall}, roc_auc_weighted: {roc_auc}")

    # Log do Relat√≥rio de Classifica√ß√£o como artefato de texto
    report = classification_report(y_val, predictions)
    mlflow.log_text(report, "classification_report.txt")

    # Log do modelo
    mlflow.sklearn.log_model(model, model_name)

### Modelo 1: Regress√£o Log√≠stica

In [None]:
with mlflow.start_run(run_name="Logistic Regression"):
   
    # Par√¢metros para GridSearchCV
    param_grid_lr = {
        'C': [0.1, 1.0, 10.0],
        'solver': ['liblinear', 'saga']
    }
    
    lr = LogisticRegression(random_state=42, max_iter=1000)

    grid_search_lr = GridSearchCV(lr, param_grid_lr, cv=5, scoring='f1_weighted', n_jobs=-1)
    grid_search_lr.fit(X_train, y_train)
    
    best_model_lr = grid_search_lr.best_estimator_
    mlflow.log_params(grid_search_lr.best_params_)
    
    evaluate_and_log_model("sklearn", "logistic_regression", best_model_lr, X_val, y_val)

2025/07/17 22:25:11 INFO mlflow.sklearn.utils: Logging the 5 best runs, one run will be omitted.


üèÉ View run rumbling-jay-424 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/d230974a3fd44c428684444ad9ff652d
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0
üèÉ View run omniscient-shark-973 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/e7d4dffaaafb4ff899150ea0d159ca5f
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0
üèÉ View run unique-foal-487 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/ab3bd22a518f4e99a330e73c2f32d474
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0
üèÉ View run illustrious-toad-920 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/61c55dc6



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Model logistic_regression logged with accuracy: 0.6288, f1_weighted: 0.6213540988999845, precision_weighted: 0.6245405785213104, recall_weighted: 0.6288, roc_auc_weighted: 0.7693549318172382




üèÉ View run Logistic Regression at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/df46dbd0d50e4942a4d4349071a5c338
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0


üèÉ View run sedate-conch-674 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/2223b970f6684eef9c75fa67a641c2bb
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0
üèÉ View run loud-shark-799 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/4e91bf93ced2431ca1762583e21c9fea
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0
üèÉ View run stylish-dove-99 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/f95d0c4ae62142429057834feba530c2
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0
üèÉ View run resilient-shad-188 at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/56a495f635544079

### Modelo 2: √Årvore de Decis√£o

In [25]:
with mlflow.start_run(run_name="Decision Tree Classifier"):
    dt = DecisionTreeClassifier(random_state=42)
    
    param_grid_dt = {
        'max_depth': [5, 10, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    
    grid_search_dt = GridSearchCV(dt, param_grid_dt, cv=5, scoring='f1_weighted', n_jobs=-1)
    grid_search_dt.fit(X_train, y_train)

    best_model_dt = grid_search_dt.best_estimator_
    mlflow.log_params(grid_search_dt.best_params_)

    evaluate_and_log_model("sklearn", "decision_tree_classifier", best_model_dt, X_val, y_val)

2025/07/17 22:27:51 INFO mlflow.sklearn.utils: Logging the 5 best runs, 22 runs will be omitted.


Resultados para decision_tree_classifier:
  Acur√°cia: 0.7088
  F1-Score (Weighted): 0.7110
  ROC AUC (Weighted): 0.8311




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Model decision_tree_classifier logged with accuracy: 0.70885, f1_weighted: 0.7110410036315017, precision_weighted: 0.7159983852334575, recall_weighted: 0.70885, roc_auc_weighted: 0.8311445429371924




üèÉ View run Decision Tree Classifier at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/d7912ddd228a40e996e5c4ffe7934d5e
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0


### Modelo 3: XGBoost Classifier

In [26]:
with mlflow.start_run(run_name="XGBoost Classifier"):
    
    xgb = XGBClassifier(random_state=42, eval_metric='mlogloss')
    
    param_grid_xgb = {
        'n_estimators': [100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.05, 0.1],
        'subsample': [0.8, 1.0]
    }
    
    grid_search_xgb = GridSearchCV(xgb, param_grid_xgb, cv=5, scoring='f1_weighted', n_jobs=-1)
    grid_search_xgb.fit(X_train, y_train)

    best_model_xgb = grid_search_xgb.best_estimator_
    mlflow.log_params(grid_search_xgb.best_params_)
    
    evaluate_and_log_model("xgboost", "xgboost_classifier", best_model_xgb, X_val, y_val)

2025/07/17 22:35:14 INFO mlflow.sklearn.utils: Logging the 5 best runs, 19 runs will be omitted.


Resultados para xgboost_classifier:
  Acur√°cia: 0.7620
  F1-Score (Weighted): 0.7618
  ROC AUC (Weighted): 0.8782


  self.get_booster().save_model(fname)


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Model xgboost_classifier logged with accuracy: 0.76195, f1_weighted: 0.7617952702484266, precision_weighted: 0.7618689695604779, recall_weighted: 0.76195, roc_auc_weighted: 0.8781685603706157




üèÉ View run XGBoost Classifier at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0/runs/a20e075a6f7a434ea7ebdf2bee2d585d
üß™ View experiment at: https://dagshub.com/pedromonnt/fiap-credit-score-classification-model.mlflow/#/experiments/0


## Registro de Modelo em Produ√ß√£o

In [30]:
run_id = "a20e075a6f7a434ea7ebdf2bee2d585d"

mlflow.register_model(model_uri=f"runs:/{run_id}/model", name="credit-score-classification-model")

Registered model 'credit-score-classification-model' already exists. Creating a new version of this model...
2025/07/17 22:43:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: credit-score-classification-model, version 10
Created version '10' of model 'credit-score-classification-model'.


<ModelVersion: aliases=[], creation_timestamp=1752802990112, current_stage='None', description='', last_updated_timestamp=1752802990112, name='credit-score-classification-model', run_id='a20e075a6f7a434ea7ebdf2bee2d585d', run_link='', source='mlflow-artifacts:/5cb5e553364e438896ea46ed8538561f/a20e075a6f7a434ea7ebdf2bee2d585d/artifacts/model', status='READY', status_message=None, tags={}, user_id='', version='10'>