In [7]:
import pandas as pd
import numpy as np
# clasificación
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from evidently import Report
from evidently import DataDefinition
from evidently import Dataset
from evidently.metrics import ValueDrift, DriftedColumnsCount, MissingValueCount
from lightgbm import LGBMClassifier

import requests
from datetime import datetime, timedelta

In [6]:
import joblib

model = joblib.load("model/modelo_credito.pkl")

In [8]:
df_test = pd.read_parquet("data/04_modeling_testing_data.parquet")

In [10]:
X_test = df_test.drop(columns=["loan_status"])
y_test = df_test["loan_status"]

In [13]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [14]:
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.6542491438918785
Precision: 0.3430169613857813
Recall: 0.6863287831379156
F1 Score: 0.45742145427045855


Todo el proceso será una simulación de cómo hacer el monitoreo, puesto que no tenemos la data actualizada diariamente.

### Inyectar data a InfluxDB

In [11]:
BASE_URL = "http://localhost:8086"

db_params = {
    "db": "monitoring_metrics",
    "u": "admin",
    "p": "admin"
}

# Decirle que tipo de transferencia de datos vamos a hacer, texto plano y no un json
headers = {
    "Content-Type": "text/plain; charset=utf-8",
}

In [12]:
r = requests.post(f"{BASE_URL}/query", params= db_params, data={"q": "SHOW DATABASES"})
r.json()

{'results': [{'statement_id': 0,
   'series': [{'name': 'databases',
     'columns': ['name'],
     'values': [['monitoring_metrics'], ['_internal']]}]}]}

Preparando la data de simulación

In [15]:
accuracy_data = []
recall_data = []
precision_data = []
f1_data = []

DATA_POINTS = 100

for i in range(DATA_POINTS):
    accuracy = round(accuracy_score(y_test, y_pred) + np.random.normal(0.03, 0.01), 4)
    precision = round(precision_score(y_test, y_pred) + np.random.normal(0.03, 0.01), 4)
    recall = round(recall_score(y_test, y_pred) + np.random.normal(0.03, 0.01), 4)
    f1 = round(f1_score(y_test, y_pred) + np.random.normal(0.03, 0.01), 4)
    
    timestamp = int((datetime.now() - timedelta(hours=100) + timedelta(hours=i)).timestamp() * 1e9) # nanoseconds
    
    line = f"model_performance_up accuracy={accuracy},precision={precision},recall={recall},f1={f1} {timestamp}"
    accuracy_data.append(line)

payload='\n'.join(accuracy_data)

print(payload[:398])

model_performance_up accuracy=0.6654,precision=0.3802,recall=0.7278,f1=0.4841 1762954692074192128
model_performance_up accuracy=0.6825,precision=0.3774,recall=0.7093,f1=0.4847 1762958292230441216
model_performance_up accuracy=0.67,precision=0.3675,recall=0.7079,f1=0.4845 1762961892387745024
model_performance_up accuracy=0.6924,precision=0.3621,recall=0.6915,f1=0.4808 1762965492533543168
model_pe


Inyectamos la data a la base de datos de series de tiempo

In [16]:
write_r = requests.post(f"{BASE_URL}/write", params=db_params,  data=payload, headers=headers)

In [23]:
r = requests.post(f"{BASE_URL}/query", params= db_params, data={"q": "SELECT * FROM model_performance_up"})
r.json()

{'results': [{'statement_id': 0,
   'series': [{'name': 'model_performance_up',
     'columns': ['time', 'accuracy', 'f1', 'precision', 'recall'],
     'values': [['2025-11-12T13:38:12.074192128Z',
       0.6654,
       0.4841,
       0.3802,
       0.7278],
      ['2025-11-12T14:38:12.230441216Z', 0.6825, 0.4847, 0.3774, 0.7093],
      ['2025-11-12T15:38:12.387745024Z', 0.67, 0.4845, 0.3675, 0.7079],
      ['2025-11-12T16:38:12.533543168Z', 0.6924, 0.4808, 0.3621, 0.6915],
      ['2025-11-12T17:38:12.655430912Z', 0.6792, 0.4842, 0.3863, 0.7232],
      ['2025-11-12T18:38:12.745793024Z', 0.6801, 0.5026, 0.3733, 0.7188],
      ['2025-11-12T19:38:12.831367936Z', 0.6902, 0.4768, 0.374, 0.7336],
      ['2025-11-12T20:38:12.917821952Z', 0.6931, 0.4778, 0.3872, 0.7177],
      ['2025-11-12T21:38:13.016331008Z', 0.7023, 0.4904, 0.3521, 0.7033],
      ['2025-11-12T22:38:13.107667968Z', 0.6868, 0.4792, 0.3704, 0.7155],
      ['2025-11-12T23:38:13.191753984Z', 0.686, 0.4811, 0.3783, 0.7331],
     

Simulación de data de drift

In [24]:
reference_data = X_test.copy()
reference_data += np.random.normal(loc=0, scale=0.01, size=reference_data.shape)
reference_data["prediction"] = model.predict(X_test)

current_data = X_test.copy()
current_data["prediction"] = model.predict(X_test)

In [26]:
report = Report(metrics = [
    DriftedColumnsCount(method="psi")
    ])

drift_report = report.run(reference_data=reference_data, 
                          current_data=current_data)      

In [28]:
drifted_columns_count = drift_report.dict()["metrics"][0]["value"]["count"]

for i in range(100):
    timestamp = datetime.now() - timedelta(hours=100) + timedelta(hours=i)
    drift_payload = f"drift_metrics_columnas drifted_columns_count={drifted_columns_count + np.random.randint(10)} {int(timestamp.timestamp()) * 1000000000}"
    write_drift = requests.post(f"{BASE_URL}/write", params=db_params,  data=drift_payload, headers=headers)