In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
# Carga de datos
data = pd.read_csv("../../new/datasets/unbalanced.csv", header=None)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [3]:
# División de datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

In [12]:
# Calcular los pesos de las clases
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))


In [16]:
class_weights

{0: 3.271221532091097, 1: 1.1474219317356573, 2: 0.5486111111111112}

In [18]:
sample_weights = np.array([class_weights[class_label - 1] for class_label in y_train])

In [20]:
# Definición del modelo y entrenamiento
gbt = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=0)
gbt.fit(X_train, y_train, sample_weight=sample_weights)

In [21]:
# Predicción y evaluación del modelo
y_pred = gbt.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print("Accuracy:", accuracy)
print("Confusion matrix:\n", cm)

Accuracy: 0.9063291139240506
Confusion matrix:
 [[ 33   4   3]
 [  8  94  13]
 [  0   9 231]]


In [22]:
from sklearn.metrics import cohen_kappa_score, classification_report

print("Cohen's kappa: ", cohen_kappa_score(y_test,y_pred))
print()
print(classification_report(y_test, y_pred, labels=[1,2,3], digits=4))

Cohen's kappa:  0.8234903381642512

              precision    recall  f1-score   support

           1     0.8049    0.8250    0.8148        40
           2     0.8785    0.8174    0.8468       115
           3     0.9352    0.9625    0.9487       240

    accuracy                         0.9063       395
   macro avg     0.8729    0.8683    0.8701       395
weighted avg     0.9055    0.9063    0.9055       395

