### Predição de Risco de Crédito

In [5]:
import pickle
from pycaret.classification import load_model, predict_model
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [6]:
def load_models():

    ml_model = load_model("models/finalized_model_ml")

    with open("models/label_encoder.pkl", "rb") as f:
        label_encoder = pickle.load(f)

    with open("models/pickled-model-regras_financeiras.pkl", "rb") as f:
        binary_model = pickle.load(f)

    return ml_model, label_encoder, binary_model

ml_model, label_encoder, binary_model = load_models()

Transformation Pipeline and Model Successfully Loaded


In [7]:
df = pd.read_csv("data/corporate_rating_evaluation.csv")

features_ml = df.drop(columns=["Unnamed: 0", "Name", "Symbol", "Rating", "Rating Agency Name", "Date"], errors="ignore")

predictions_ml = predict_model(ml_model, data=features_ml)

df["ml_predicted_class"] = predictions_ml['prediction_label']

df["ml_predict_label"] = label_encoder.inverse_transform(df["ml_predicted_class"])


In [8]:
# Os scores são normalizados e combinados em um Financial Health Score.


df["liquidity_score"] = (
    0.4 * df["currentRatio"] +
    0.3 * df["quickRatio"] +
    0.3 * df["cashRatio"]
)

df["profitability_score"] = (
    0.25 * df["grossProfitMargin"] +
    0.25 * df["operatingProfitMargin"] +
    0.25 * df["netProfitMargin"] +
    0.25 * df["returnOnAssets"]
)

df["leverage_score"] = (
    0.6 * df["debtRatio"] +
    0.4 * df["debtEquityRatio"]
)

df["cashflow_score"] = (
    0.5 * df["operatingCashFlowPerShare"] +
    0.5 * df["freeCashFlowPerShare"]
)

score_cols = [
    "liquidity_score",
    "profitability_score",
    "leverage_score",
    "cashflow_score"
]

scaler = StandardScaler()
df[score_cols] = scaler.fit_transform(df[score_cols])

df["financial_health_score"] = (
    0.3 * df["liquidity_score"] +
    0.3 * df["profitability_score"] -
    0.2 * df["leverage_score"] +
    0.2 * df["cashflow_score"]
)


#####  Modelo binário + regras financeiras

In [9]:
df["risk_probability"] = binary_model.predict_proba(
    df[score_cols + ["financial_health_score"]]
)[:, 1]

def rule_based_risk(row):
    flags = 0
    if row["currentRatio"] < 1:
        flags += 1
    if row["debtRatio"] > 0.6:
        flags += 1
    if row["returnOnAssets"] < 0:
        flags += 1
    if row["operatingCashFlowPerShare"] < 0:
        flags += 1
    return flags

df["rule_flags"] = df.apply(rule_based_risk, axis=1)

df["final_risk_score"] = (
    0.7 * df["risk_probability"] +
    0.3 * (df["rule_flags"] / df["rule_flags"].max())
)

df["risk_bucket"] = pd.cut(
    df["final_risk_score"],
    bins=[0, 0.33, 0.66, 1],
    labels=["Low", "Medium", "High"]
)

##### Resultado final

In [10]:
ml_to_bucket = {
    "Lowest Risk": "Low",
    "Low Risk": "Low",
    "Medium Risk": "Medium",
    "High Risk": "High",
    "Highest Risk": "High"
}

df["ml_risk_bucket"] = df["ml_predict_label"].map(ml_to_bucket)

df["risk_disagreement"] = df["ml_risk_bucket"] != df["risk_bucket"]

In [15]:
final_cols = [
    "Name",
    "ml_predict_label",
    "risk_probability",
    "rule_flags",
    "final_risk_score",
    "risk_bucket",
    "risk_disagreement"
]

final_cols = [c for c in final_cols if c in df.columns]


def highlight_disagreement(row):
    if row["risk_disagreement"]:
        return ["background-color: #ffcccc"] * len(row)
    else:
        return [""] * len(row)

styled_df = df[final_cols].style.apply(highlight_disagreement, axis=1)
styled_df

Unnamed: 0,Name,ml_predict_label,risk_probability,rule_flags,final_risk_score,risk_bucket,risk_disagreement
0,Ecolab Inc.,Medium Risk,0.18241,1,0.202687,Low,True
1,SM Energy Company,High Risk,0.250424,3,0.400297,Medium,True
2,"Estee Lauder Companies, Inc. (The)",Low Risk,0.992148,1,0.769504,High,True
3,Signet Jewelers Limited,High Risk,0.963851,0,0.674696,High,False
4,"Kinder Morgan, Inc.",Low Risk,0.217641,1,0.227349,Low,False
5,"Oceaneering International, Inc.",Medium Risk,0.963486,0,0.67444,High,True
6,Corning Incorporated,High Risk,0.974733,0,0.682313,High,False
7,EQT Corporation,Highest Risk,0.993154,1,0.770208,High,False
8,Embraer S.A.,Medium Risk,0.991961,1,0.769373,High,True
9,"Berry Global Group, Inc.",High Risk,0.785842,1,0.62509,Medium,True


In [16]:
agreement_summary = (df["risk_disagreement"].map({True: "Discordam", False: "Concordam"}).value_counts())
print("Total de casos:", len(df))
print("Casos que concordam:", int(agreement_summary.get("Concordam", 0)))
print("Casos que discordam:", int(agreement_summary.get("Discordam", 0)))

Total de casos: 406
Casos que concordam: 164
Casos que discordam: 242


In [18]:
df.to_csv("data/credit_risk_prediction.csv")