# Desenvolvimento do modelo

Exploração inicial de modelos de base

In [19]:
import pandas as pd
from dagshub.data_engine import datasources
import mlflow
import dagshub
from sklearn.model_selection import train_test_split
import mlflow.sklearn
import mlflow.catboost
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import mlflow.models.signature
from mlflow.models import infer_signature
from catboost import CatBoostRegressor
from sklearn.preprocessing import LabelEncoder

## Carregando Dataset

In [20]:
ds = datasources.get_datasource("luciancsilva/fiap-10dtsr-mlops-trabalho-final", "processed")

In [21]:
ds.all().dataframe

Output()

Unnamed: 0,path,datapoint_id,dagshub_download_url,media type,size
0,credit-score.csv,103597804,https://dagshub.com/api/v1/repos/luciancsilva/...,text/plain,27051762


In [22]:
res = ds.head()

for dp in res:
    dataset_url = dp.download_url

Output()

In [23]:
dataset_url

'https://dagshub.com/api/v1/repos/luciancsilva/fiap-10dtsr-mlops-trabalho-final/raw/main/data/processed/credit-score.csv'

In [24]:
df = pd.read_csv(dataset_url)
df.head()

Unnamed: 0,ID,Customer_ID,Month,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,...,Auto_Loan,Credit-Builder_Loan,Personal_Loan,Home_Equity_Loan,Not_Specified,Mortgage_Loan,Student_Loan,Debt_Consolidation_Loan,Payday_Loan,Missed_Payment_Day
0,0x1602,CUS_0xd40,1,23.0,Scientist,19114.12,1824.843333,3.0,4.0,3.0,...,1,1,1,1,0,0,0,0,0,1
1,0x1603,CUS_0xd40,2,23.0,Scientist,19114.12,1824.843333,3.0,4.0,3.0,...,1,1,1,1,0,0,0,0,0,0
2,0x1604,CUS_0xd40,3,23.0,Scientist,19114.12,1824.843333,3.0,4.0,3.0,...,1,1,1,1,0,0,0,0,0,1
3,0x1605,CUS_0xd40,4,23.0,Scientist,19114.12,1824.843333,3.0,4.0,3.0,...,1,1,1,1,0,0,0,0,0,1
4,0x1606,CUS_0xd40,5,23.0,Scientist,19114.12,1824.843333,3.0,4.0,3.0,...,1,1,1,1,0,0,0,0,0,1


## Desenvolvimento e experimentos de modelos

In [25]:
dagshub.init(repo_owner="luciancsilva", repo_name="fiap-10dtsr-mlops-trabalho-final", mlflow=True)

In [26]:
mlflow.autolog()

2025/08/03 09:28:25 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/08/03 09:28:25 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/08/03 09:28:25 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


In [27]:
# Usar o LabelEncoder para a coluna 'Payment_of_Min_Amount'
le = LabelEncoder()
df['Payment_of_Min_Amount'] = le.fit_transform(df['Payment_of_Min_Amount'].astype(str))

In [28]:
features = list(df.columns)
features.remove('ID')
features.remove('Customer_ID')
features.remove('Credit_Score')
features.remove('Occupation')
features.remove('Monthly_Inhand_Salary')
features.remove('Interest_Rate')
features.remove('Type_of_Loan')
features.remove('Delay_from_due_date')
features.remove('Changed_Credit_Limit')
features.remove('Num_Credit_Inquiries')
features.remove('Credit_Mix')
features.remove('Amount_invested_monthly')
features.remove('Monthly_Balance')
features.remove('Num_of_Loan')
features.remove('Outstanding_Debt')
features.remove('Not_Specified')
features.remove('Month')

features

['Age',
 'Annual_Income',
 'Num_Bank_Accounts',
 'Num_Credit_Card',
 'Num_of_Delayed_Payment',
 'Credit_Utilization_Ratio',
 'Payment_of_Min_Amount',
 'Total_EMI_per_month',
 'Credit_History_Age_Formated',
 'Auto_Loan',
 'Credit-Builder_Loan',
 'Personal_Loan',
 'Home_Equity_Loan',
 'Mortgage_Loan',
 'Student_Loan',
 'Debt_Consolidation_Loan',
 'Payday_Loan',
 'Missed_Payment_Day']

In [29]:
X = df[features]

In [30]:
len(features)

18

In [31]:
y = df["Credit_Score"]
y

0        1
1        1
2        1
3        1
4        1
        ..
99995   -1
99996   -1
99997   -1
99998    0
99999   -1
Name: Credit_Score, Length: 100000, dtype: int64

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [33]:
def evaluate_and_log_model(kind, model_name, model, X_test, y_test):
    predictions = model.predict(X_test)

    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    mape = mean_absolute_percentage_error(y_test, predictions)

    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("R2", r2)
    mlflow.log_metric("MAPE", mape)

    signature = infer_signature(X_test, predictions)

    if kind == "catboost":
        mlflow.catboost.log_model(model, model_name, signature=signature, input_example=X_test[:5])
    elif kind == "xgboost":
        mlflow.xgboost.log_model(model, model_name, signature=signature, input_example=X_test[:5])
    elif kind == "lightgbm":
        mlflow.lightgbm.log_model(model, model_name, signature=signature, input_example=X_test[:5])
    else:
        mlflow.sklearn.log_model(model, model_name, signature=signature, input_example=X_test[:5])

    print(f"Model {model_name} logged with MSE: {mse}, MAE: {mae}, R2: {r2}, MAPE: {mape}")

### Experimento com Ridge Regression

In [34]:
print(X_train.dtypes)

Age                            float64
Annual_Income                  float64
Num_Bank_Accounts              float64
Num_Credit_Card                float64
Num_of_Delayed_Payment         float64
Credit_Utilization_Ratio       float64
Payment_of_Min_Amount            int64
Total_EMI_per_month            float64
Credit_History_Age_Formated    float64
Auto_Loan                        int64
Credit-Builder_Loan              int64
Personal_Loan                    int64
Home_Equity_Loan                 int64
Mortgage_Loan                    int64
Student_Loan                     int64
Debt_Consolidation_Loan          int64
Payday_Loan                      int64
Missed_Payment_Day               int64
dtype: object


In [35]:
with mlflow.start_run(run_name="Ridge Regression"):
    param_grid = {
        'alpha': [0.1, 1.0, 10.0, 100.0],
        'fit_intercept': [True, False],
        }
    
    ridge = Ridge()

    grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False))
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    mlflow.log_param("best_alpha", grid_search.best_params_['alpha'])
    mlflow.log_param("best_fit_intercept", grid_search.best_params_['fit_intercept'])

    evaluate_and_log_model("sklearn", "ridge_regression", best_model, X_test, y_test)

2025/08/03 09:28:43 INFO mlflow.sklearn.utils: Logging the 5 best runs, 3 runs will be omitted.


🏃 View run flawless-turtle-22 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/88538ab84cfb462080e774193099eb60
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run luxuriant-colt-886 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/384c96a74dd24eb59cc9fc38f1e3f1cd
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run treasured-sloth-482 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/6c951dede4ec45fcb4a9a6939729d2b2
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run gaudy-penguin-120 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/65b97611eea442009a2f750ed23ce2ec
🧪 View experiment at: h



Model ridge_regression logged with MSE: 0.33315986624909355, MAE: 0.46977569689736315, R2: 0.27165561003667216, MAPE: 611791517760599.6
🏃 View run Ridge Regression at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/ef1c90cd7e9f49df8bb92e3500eddc26
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0


🏃 View run respected-shoat-95 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/5b3dab1305044e42848ad742367f28fb
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run polite-colt-649 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/f9082a5d17854dc885e93b9d263f9bb5
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run charming-chimp-610 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/5bd267ab24ae4379b81657fc0136e117
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run legendary-shrew-285 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/3e43b150e9d844cf88dda658352edb54
🧪 View experiment at: htt



🏃 View run traveling-loon-852 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/9e108efd933c4d5ba073450cee1c17a1
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run serious-owl-708 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/4397586794694862beebb4716485f8d7
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0




🏃 View run amusing-crow-947 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/aa2239fc51b14cefb1848d470c072848
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run charming-cub-232 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/ae5f541277dd400aae69850455e7ff16
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run unequaled-tern-356 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/ff24b3ff94e943e49dd230a6c59b968a
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0
🏃 View run popular-crow-102 at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/96d35791271945eb98a7e17a57790a80
🧪 View experiment at: https:/

### Decision Tree Regressor

In [36]:
with mlflow.start_run(run_name="Decision Tree Regression"):
    param_grid = {
        'max_depth': [None, 3, 5, 10, 15],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    
    ridge = DecisionTreeRegressor()

    grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False))
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    mlflow.log_param("best_max_depth", grid_search.best_params_['max_depth'])
    mlflow.log_param("best_min_samples_split", grid_search.best_params_['min_samples_split'])
    mlflow.log_param("best_min_samples_leaf", grid_search.best_params_['min_samples_leaf'])

    evaluate_and_log_model("sklearn", "ridge_regression", best_model, X_test, y_test)

2025/08/03 09:30:34 INFO mlflow.sklearn.utils: Logging the 5 best runs, 40 runs will be omitted.


Model ridge_regression logged with MSE: 0.31885462662261727, MAE: 0.4516795087178103, R2: 0.30292930799534235, MAPE: 597679450542288.4
🏃 View run Decision Tree Regression at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/d9c1300bdc344c9b821db3b8eb0c5f6b
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0


## XGBoost

In [37]:
with mlflow.start_run(run_name="XGBoost_Regressor_Advanced"):
   
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0],
        'gamma': [0, 1],
        'reg_alpha': [0, 0.1],
        'reg_lambda': [1, 5],
        'min_child_weight': [1, 3]
    }
   
    xgb = XGBRegressor(random_state=42, verbosity=0)
    grid_search = GridSearchCV(xgb, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_max_depth", best_model.max_depth)
    mlflow.log_param("best_learning_rate", best_model.learning_rate)
    evaluate_and_log_model("xgboost", "XGBoost Regressor", best_model, X_test, y_test)

2025/08/03 09:43:15 INFO mlflow.sklearn.utils: Logging the 5 best runs, 763 runs will be omitted.
  self.get_booster().save_model(fname)


Model XGBoost Regressor logged with MSE: 0.34422051906585693, MAE: 0.4623214304447174, R2: 0.24747514724731445, MAPE: 375198645747712.0
🏃 View run XGBoost_Regressor_Advanced at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/ede8515dc513474782122dcf1af2b771
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0


## Nova abordagem XGBoost

In [None]:
with mlflow.start_run(run_name="XGBoost_Regressor"):
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 5, 7, 9],
        'learning_rate': [0.01, 0.1, 0.2, 0.3],
    }
    xgb = XGBRegressor(random_state=42, verbosity=0)
    grid_search = GridSearchCV(xgb, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_max_depth", best_model.max_depth)
    mlflow.log_param("best_learning_rate", best_model.learning_rate)
    evaluate_and_log_model("xgboost", "XGBoost Regressor", best_model, X_test, y_test)

2025/08/03 09:45:23 INFO mlflow.sklearn.utils: Logging the 5 best runs, 43 runs will be omitted.
  self.get_booster().save_model(fname)


Model XGBoost Regressor logged with MSE: 0.3431451618671417, MAE: 0.46224772930145264, R2: 0.24982601404190063, MAPE: 382015899697152.0
🏃 View run XGBoost_Regressor at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0/runs/b575039f697e42ff8d60a67efb261788
🧪 View experiment at: https://dagshub.com/luciancsilva/fiap-10dtsr-mlops-trabalho-final.mlflow/#/experiments/0


## Registro de Modelo em Produção

In [45]:
run_id = "ede8515dc513474782122dcf1af2b771"

mlflow.register_model(model_uri=f"runs:/{run_id}/model", name="credit_score_model")

Successfully registered model 'credit_score_model'.
2025/08/03 10:36:00 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: credit_score_model, version 1
Created version '1' of model 'credit_score_model'.


<ModelVersion: aliases=[], creation_timestamp=1754228160131, current_stage='None', description='', last_updated_timestamp=1754228160131, name='credit_score_model', run_id='ede8515dc513474782122dcf1af2b771', run_link='', source='mlflow-artifacts:/d89c8d3636e44e82a9744e47bc1ceb8e/ede8515dc513474782122dcf1af2b771/artifacts/model', status='READY', status_message=None, tags={}, user_id='', version='1'>