In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, RocCurveDisplay
import matplotlib.pyplot as plt

In [None]:
#função para automatizar as métricas
def metricas_classificacao(estimator, X_train, X_test, y_train, y_test):

    print("\nMétricas de avaliação de treino:")
    y_pred_train = estimator.predict(X_train)
    cm_train = confusion_matrix(y_train, y_pred_train)
    ConfusionMatrixDisplay(cm_train).plot(cmap='viridis')
    plt.show()
    print(classification_report(y_train, y_pred_train))
    RocCurveDisplay.from_estimator(estimator, X_train, y_train)

    print("\nMétricas de avaliação de teste:")
    y_pred_test = estimator.predict(X_test)
    cm_test = confusion_matrix(y_test, y_pred_test)
    ConfusionMatrixDisplay(cm_test).plot(cmap='viridis')
    plt.show()
    print(classification_report(y_test, y_pred_test))
    RocCurveDisplay.from_estimator(estimator, X_test, y_test)

In [4]:
#carregamento dos dados
df = pd.read_csv('train_treated.csv')

X = df.drop(columns="credit_score")
y = df["credit_score"]

In [None]:
#divisião dos dados de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
#transformação das features numéricas
features_numerics = X_train.select_dtypes(include=np.number).columns.tolist()

num_imputer = SimpleImputer(strategy="median")
X_train[features_numerics] = num_imputer.fit_transform(X_train[features_numerics])

scaler = StandardScaler()
X_train[features_numerics] = scaler.fit_transform(X_train[features_numerics])

In [None]:
#pré-processamento dos dados de teste
X_test[features_numerics] = num_imputer.transform(X_test[features_numerics])
X_test[features_numerics] = scaler.transform(X_test[features_numerics])

In [None]:
# Treinando o modelo SVM
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)

# Avaliar o modelo com as métricas de classificação
metricas_classificacao(svm_model, X_train, X_test, y_train, y_test)