<a href="https://colab.research.google.com/github/robert2999/machine_learning/blob/main/predicci_n_de_quiebres_de_sla_en_cx.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, roc_auc_score

In [None]:
df = pd.read_csv("encuestas_demo.csv")
df["quiebre_sla"] = (df["primera_respuesta_horas"] > df["sla_objetivo_horas"]).astype(int)
df["len_comentario"] = df["comentario"].fillna("").str.len()

In [None]:
text = "comentario"
cats = ["canal", "categoria", "severidad"]
nums = ["sla_objetivo_horas", "len_comentario"]
y = df["quiebre_sla"]
X = df[[text] + cats + nums]

In [None]:
pre = ColumnTransformer([
    ("txt", TfidfVectorizer(max_features=2000, ngram_range=(1,2)), "comentario"),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cats),
    ("num", "passthrough", nums)
])

In [None]:
pipe = Pipeline([
    ("prep", pre),
    ("clf", LogisticRegression(max_iter=1000))
])

In [None]:
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
pipe.fit(Xtr, ytr)

pred = pipe.predict(Xte)
proba = pipe.predict_proba(Xte)[:,1]
print(classification_report(yte, pred))
print("AUC:", roc_auc_score(yte, proba))

              precision    recall  f1-score   support

           0       0.93      0.99      0.96        92
           1       0.50      0.12      0.20         8

    accuracy                           0.92       100
   macro avg       0.71      0.56      0.58       100
weighted avg       0.89      0.92      0.90       100

AUC: 0.8355978260869564
