# Importanción de librerías

In [1]:
# Importamos librerías principales
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import math

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, classification_report

In [3]:
!pip install -q jupyter-dash dash==2.17.1 dash-bootstrap-components plotly

In [4]:
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output, State, callback
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go

# Cargue de datos

In [5]:
# Configuraciones visuales
pd.set_option('display.max_columns', None)
sns.set(style="whitegrid", palette="Set2", font_scale=1.1)

In [6]:
# Cargue de datos
df = pd.read_csv("credit_risk_dataset.csv")

In [7]:
# Vista inicial
print("Dimensiones del dataset:", df.shape)
print("\nPrimeras filas:")
display(df.head())

Dimensiones del dataset: (32581, 12)

Primeras filas:


Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,RENT,123.0,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5.0,EDUCATION,B,1000,11.14,0,0.1,N,2
2,25,9600,MORTGAGE,1.0,MEDICAL,C,5500,12.87,1,0.57,N,3
3,23,65500,RENT,4.0,MEDICAL,C,35000,15.23,1,0.53,N,2
4,24,54400,RENT,8.0,MEDICAL,C,35000,14.27,1,0.55,Y,4


In [8]:
# 3. Información general del dataset
print("\n--- Información general ---")
df.info()

print("\n--- Resumen estadístico ---")
display(df.describe(include='all'))

# Conteo de valores nulos
print("\n--- Valores nulos por variable ---")
display(df.isnull().sum())

# Porcentaje de valores faltantes
missing_percent = df.isnull().mean() * 100
print("\n--- Porcentaje de valores faltantes ---")
display(missing_percent.sort_values(ascending=False))


--- Información general ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32581 entries, 0 to 32580
Data columns (total 12 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   person_age                  32581 non-null  int64  
 1   person_income               32581 non-null  int64  
 2   person_home_ownership       32581 non-null  object 
 3   person_emp_length           31686 non-null  float64
 4   loan_intent                 32581 non-null  object 
 5   loan_grade                  32581 non-null  object 
 6   loan_amnt                   32581 non-null  int64  
 7   loan_int_rate               29465 non-null  float64
 8   loan_status                 32581 non-null  int64  
 9   loan_percent_income         32581 non-null  float64
 10  cb_person_default_on_file   32581 non-null  object 
 11  cb_person_cred_hist_length  32581 non-null  int64  
dtypes: float64(3), int64(5), object(4)
memory usage: 3.0+ MB

-

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
count,32581.0,32581.0,32581,31686.0,32581,32581,32581.0,29465.0,32581.0,32581.0,32581,32581.0
unique,,,4,,6,7,,,,,2,
top,,,RENT,,EDUCATION,A,,,,,N,
freq,,,16446,,6453,10777,,,,,26836,
mean,27.7346,66074.85,,4.789686,,,9589.371106,11.011695,0.218164,0.170203,,5.804211
std,6.348078,61983.12,,4.14263,,,6322.086646,3.240459,0.413006,0.106782,,4.055001
min,20.0,4000.0,,0.0,,,500.0,5.42,0.0,0.0,,2.0
25%,23.0,38500.0,,2.0,,,5000.0,7.9,0.0,0.09,,3.0
50%,26.0,55000.0,,4.0,,,8000.0,10.99,0.0,0.15,,4.0
75%,30.0,79200.0,,7.0,,,12200.0,13.47,0.0,0.23,,8.0



--- Valores nulos por variable ---


person_age                       0
person_income                    0
person_home_ownership            0
person_emp_length              895
loan_intent                      0
loan_grade                       0
loan_amnt                        0
loan_int_rate                 3116
loan_status                      0
loan_percent_income              0
cb_person_default_on_file        0
cb_person_cred_hist_length       0
dtype: int64


--- Porcentaje de valores faltantes ---


loan_int_rate                 9.563856
person_emp_length             2.747000
person_age                    0.000000
person_income                 0.000000
person_home_ownership         0.000000
loan_intent                   0.000000
loan_grade                    0.000000
loan_amnt                     0.000000
loan_status                   0.000000
loan_percent_income           0.000000
cb_person_default_on_file     0.000000
cb_person_cred_hist_length    0.000000
dtype: float64

In [9]:
# 4. Limpieza básica

# Manejo de valores nulos
df = df.dropna()

In [10]:
df.shape

(28638, 12)

In [11]:
# Variable objetivo y predictores
y = df["loan_status"]
X = df.drop(columns=["loan_status"])

In [12]:
# Variables categóricas y numéricas
cat_cols = ["person_home_ownership", "loan_intent", "loan_grade", "cb_person_default_on_file"]
num_cols = [c for c in X.columns if c not in cat_cols]

# Preprocesamiento: One Hot Encoding

In [13]:
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(drop="first", handle_unknown="ignore"), cat_cols)
    ],
    remainder="passthrough"
)

# Separación de conjuntos de entrenamiento y prueba

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Definir el modelo base

In [15]:
rf = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

# Construcción de Pipeline completo

In [16]:
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", rf)
])

# Entrenamiento del modelo

In [17]:
model.fit(X_train, y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



# Evaluación del modelo

In [18]:
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

In [19]:
metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Precision": precision_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "ROC_AUC": roc_auc_score(y_test, y_proba)
}

print("\n==== RESULTADOS DEL MODELO RANDOM FOREST ====")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

print("\nMatriz de confusión:")
print(confusion_matrix(y_test, y_pred))

print("\nReporte de clasificación:")
print(classification_report(y_test, y_pred))


==== RESULTADOS DEL MODELO RANDOM FOREST ====
Accuracy: 0.9282
Precision: 0.9663
Recall: 0.6930
ROC_AUC: 0.9293

Matriz de confusión:
[[4457   30]
 [ 381  860]]

Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      4487
           1       0.97      0.69      0.81      1241

    accuracy                           0.93      5728
   macro avg       0.94      0.84      0.88      5728
weighted avg       0.93      0.93      0.92      5728



# Dashboard dinámico desarrollado en Dash

In [20]:
from jupyter_dash import JupyterDash
from dash import Dash, dcc, html, Input, Output, State, callback, dash_table
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

# ---- Valores permitidos desde tus datos (asegura consistencia) ----
opts_home   = sorted(df["person_home_ownership"].dropna().unique().tolist())
opts_intent = sorted(df["loan_intent"].dropna().unique().tolist())
opts_grade  = ["A","B","C","D","E","F","G"]
opts_default= ["N","Y"]  # según dataset

# ---- App ----
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Store para histórico de predicciones
store_pred = dcc.Store(id="pred-store", data=[])

def gauge_fig(score, prob):
    # score en escala 300-850 solo para mostrar estilo; prob es 0-1
    score_display = int(300 + prob*550)
    fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=score_display,
        number={'valueformat': 'd'},
        title={'text': f"Prob. incumplimiento: {prob*100:.1f}%"},
        gauge={
            'axis': {'range': [300, 850]},
            'bar': {'color': 'rgba(0,0,0,0)'},
            'steps': [
                {'range':[300, 580], 'color':'#2ecc71'},
                {'range':[580, 700], 'color':'#f1c40f'},
                {'range':[700, 850], 'color':'#e74c3c'}
            ],
            'threshold': {'line': {'color': "#34495e", 'width': 4},
                          'thickness': 0.75, 'value': score_display}
        }
    ))
    fig.update_layout(height=300, margin=dict(l=10,r=10,t=40,b=10))
    return fig

app.layout = dbc.Container([
    html.H3("Mosedal Financial Group — CreditRisk360°", className="mt-3"),
    dcc.Tabs(id="tabs", value="tab-predict", children=[
        dcc.Tab(label="Sección Predictiva", value="tab-predict"),
        dcc.Tab(label="Sección Descriptiva", value="tab-desc")
    ], className="mb-3"),

    # ---------- Predictiva ----------
    html.Div(id="tab-predict-content", children=[
        dbc.Row([
            # Panel de entrada
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader("Entrada del Cliente"),
                    dbc.CardBody([
                        dbc.Row([
                            dbc.Col([
                                dbc.Label("Edad (años)"),
                                dbc.Input(id="inp-age", type="number", value=30, min=18, step=1)
                            ], md=6),
                            dbc.Col([
                                dbc.Label("Ingresos mensuales"),
                                dbc.Input(id="inp-income", type="number", value=60000, min=0, step=100)
                            ], md=6)
                        ], className="mb-2"),

                        dbc.Row([
                            dbc.Col([
                                dbc.Label("Antigüedad en empleo (años)"),
                                dbc.Input(id="inp-empl", type="number", value=5, min=0, step=1)
                            ], md=6),
                            dbc.Col([
                                dbc.Label("Historial crediticio (años)"),
                                dbc.Input(id="inp-credlen", type="number", value=5, min=0, step=1)
                            ], md=6)
                        ], className="mb-2"),

                        dbc.Row([
                            dbc.Col([
                                dbc.Label("Monto del préstamo"),
                                dbc.Input(id="inp-loan-amnt", type="number", value=15000, min=0, step=100)
                            ], md=6),
                            dbc.Col([
                                dbc.Label("Tasa de interés (%)"),
                                dbc.Input(id="inp-rate", type="number", value=12.0, min=0, step=0.1)
                            ], md=6),
                        ], className="mb-2"),

                        dbc.Row([
                            dbc.Col([
                                dbc.Label("Propiedad de vivienda"),
                                dbc.Select(id="sel-home",
                                           options=[{"label":o, "value":o} for o in opts_home],
                                           value=opts_home[0])
                            ], md=6),
                            dbc.Col([
                                dbc.Label("Intención del préstamo"),
                                dbc.Select(id="sel-intent",
                                           options=[{"label":o, "value":o} for o in opts_intent],
                                           value=opts_intent[0])
                            ], md=6),
                        ], className="mb-2"),

                        dbc.Row([
                            dbc.Col([
                                dbc.Label("Grado (loan_grade)"),
                                dbc.Select(id="sel-grade",
                                           options=[{"label":g, "value":g} for g in opts_grade],
                                           value="B")
                            ], md=6),
                            dbc.Col([
                                dbc.Label("Incumplimiento previo (Y/N)"),
                                dbc.Select(id="sel-default",
                                           options=[{"label":d, "value":d} for d in opts_default],
                                           value="N")
                            ], md=6),
                        ], className="mb-2"),

                        dbc.Checklist(
                            options=[{"label": " Calcular automáticamente loan_percent_income (monto/ingreso)", "value":"auto"}],
                            value=["auto"], id="chk-auto", switch=True, className="mb-2"
                        ),

                        dbc.Label("loan_percent_income (0–1)"),
                        dbc.Input(id="inp-lpi", type="number", step=0.01, value=None, disabled=True),

                        dbc.Button("Predecir", id="btn-predict", color="primary", className="mt-3", n_clicks=0)
                    ])
                ])
            ], md=4),

            # Resultado + gráfico
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader("Resultado del Modelo"),
                    dbc.CardBody([
                        html.Div(id="pred-label", style={"fontSize":"1.1rem", "fontWeight":"600"}, className="mb-2"),
                        dcc.Graph(id="gauge-score"),
                    ])
                ], className="mb-3"),
                dbc.Card([
                    dbc.CardHeader("Histórico de predicciones"),
                    dbc.CardBody([dcc.Graph(id="pred-history")])
                ])
            ], md=8)
        ]),
        store_pred
    ]),

    # ---------- Descriptiva ----------
    html.Div(id="tab-desc-content", children=[
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader("Filtros"),
                    dbc.CardBody([
                        dbc.Row([
                            dbc.Col([
                                dbc.Label("Filtro: Intención del préstamo"),
                                dbc.Select(id="f-intent",
                                           options=[{"label":"Todos","value":"ALL"}]+[{"label":o,"value":o} for o in opts_intent],
                                           value="ALL")
                            ], md=4),
                            dbc.Col([
                                dbc.Label("Filtro: Grado (loan_grade)"),
                                dbc.Select(id="f-grade",
                                           options=[{"label":"Todos","value":"ALL"}]+[{"label":g,"value":g} for g in opts_grade],
                                           value="ALL")
                            ], md=4),
                            dbc.Col([
                                dbc.Label("Filtro: Propiedad de vivienda"),
                                dbc.Select(id="f-home",
                                           options=[{"label":"Todos","value":"ALL"}]+[{"label":o,"value":o} for o in opts_home],
                                           value="ALL")
                            ], md=4),
                        ])
                    ])
                ])
            ], md=12)
        ], className="mb-3"),
        dbc.Row([
            dbc.Col(dbc.Card([dbc.CardHeader("Distribución de Loan Status"),
                              dbc.CardBody([dcc.Graph(id="hist-loan-status")])]), md=6),
            dbc.Col(dbc.Card([dbc.CardHeader("Proporción de Riesgo por Filtro"),
                              dbc.CardBody([dcc.Graph(id="risk-by-filter")])]), md=6)
        ])
    ])
], fluid=True)

# ---------- Callbacks ----------

@callback(
    Output("inp-lpi","value"), Output("inp-lpi","disabled"),
    Input("chk-auto","value"),
    Input("inp-loan-amnt","value"), Input("inp-income","value"),
    prevent_initial_call=False
)
def update_lpi(chk, loan_amnt, income):
    auto = "auto" in (chk or [])
    if auto and loan_amnt and income and income>0:
        v = max(0.0, min(loan_amnt/float(income), 1.0))
        return round(v, 4), True
    return None, (auto)

# Predicción + guardar histórico
@callback(
    Output("pred-label","children"),
    Output("gauge-score","figure"),
    Output("pred-store","data"),
    Input("btn-predict","n_clicks"),
    State("inp-age","value"), State("inp-income","value"), State("sel-home","value"),
    State("inp-empl","value"), State("sel-intent","value"), State("sel-grade","value"),
    State("inp-loan-amnt","value"), State("inp-rate","value"),
    State("inp-lpi","value"), State("sel-default","value"),
    State("inp-credlen","value"),
    State("pred-store","data"),
    prevent_initial_call=True
)
def do_predict(n, age, income, home, empl, intent, grade, loan_amnt, rate, lpi, default_on_file, cred_len, hist):
    row = {
        "person_age": age,
        "person_income": income,
        "person_home_ownership": home,
        "person_emp_length": empl,
        "loan_intent": intent,
        "loan_grade": grade,
        "loan_amnt": loan_amnt,
        "loan_int_rate": rate,
        "loan_percent_income": lpi if lpi is not None else (loan_amnt / income if income else 0),
        "cb_person_default_on_file": default_on_file,
        "cb_person_cred_hist_length": cred_len
    }
    Xnew = pd.DataFrame([row])
    prob = float(model.predict_proba(Xnew)[:,1][0])
    label = "✅ Cliente solvente (bajo riesgo)" if prob < 0.5 else "⚠️ Cliente de alto riesgo"
    fig = gauge_fig(score=None, prob=prob)

    h = hist or []
    h.append({"prob": prob, "label": "Riesgo" if prob>=0.5 else "Solvente"})
    return label, fig, h

# Gráfico de histórico de predicciones
@callback(
    Output("pred-history","figure"),
    Input("pred-store","data")
)
def plot_history(hist):
    hist = hist or []
    if not hist:
        return go.Figure()
    d = pd.DataFrame(hist)
    d["n"] = np.arange(1, len(d)+1)
    fig = px.scatter(d, x="n", y="prob", color="label",
                     title="Probabilidad de riesgo por predicción",
                     labels={"n":"# Predicción", "prob":"Prob. riesgo"})
    fig.add_hline(y=0.5, line_dash="dash", line_color="gray")
    fig.update_layout(yaxis=dict(range=[0,1]))
    return fig

# Sección descriptiva: barras y proporción
@callback(
    Output("hist-loan-status","figure"),
    Output("risk-by-filter","figure"),
    Input("f-intent","value"), Input("f-grade","value"), Input("f-home","value")
)
def desc(intent, grade, home):
    d = df.copy()
    if intent!="ALL": d = d[d["loan_intent"]==intent]
    if grade!="ALL":  d = d[d["loan_grade"]==grade]
    if home!="ALL":   d = d[d["person_home_ownership"]==home]

    # Conteo de loan_status
    fig1 = px.bar(d["loan_status"].map({0:"Solvente",1:"Riesgo"}).value_counts().rename_axis("Estado").reset_index(name="Conteo"),
                  x="Estado", y="Conteo", title="Distribución de Loan Status (filtrada)")

    # Proporción de riesgo
    risk_pct = d["loan_status"].mean()*100 if len(d)>0 else 0
    fig2 = go.Figure(go.Indicator(mode="gauge+number", value=risk_pct,
                                  number={'suffix': "%"},
                                  title={'text':"Proporción de Riesgo (%)"},
                                  gauge={'axis':{'range':[0,100]},
                                         'bar':{'color':'#e67e22'}}))
    fig2.update_layout(height=350)
    return fig1, fig2

for k, t in list(getattr(JupyterDash, "_server_threads", {}).items()):
    try:
        t.kill(); t.join()
    except Exception:
        pass
    del JupyterDash._server_threads[k]

app.run_server(mode="inline", debug=False, port=8051, use_reloader=False)


JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.

