In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import classification_report, recall_score
from basic_pipeline_functions import PipelineBasic
from sklearn.feature_selection import SelectKBest
from sklearn.svm import SVC
from sklearn.calibration import cross_val_predict
from sklearn.metrics import confusion_matrix, precision_score

data=pd.read_csv('data/data.csv')

train, test = train_test_split(data, test_size=0.2, random_state=42)

X_train = train.drop(['CREDIT_SCORE','DEFAULT'], axis=1)
y_train = train['DEFAULT']

X_test = test.drop(['CREDIT_SCORE','DEFAULT'], axis=1)
y_test = test['DEFAULT']


In [29]:
best_pipeline = Pipeline([
    ('basic_pipeline', PipelineBasic),  
    ('selector', SelectKBest(k=20)), 
    ('scaler', StandardScaler()),
    ('classifier', SVC(C=1, gamma=0.1, kernel='rbf'))
])

best_pipeline.fit(X_train, y_train)

predictions = best_pipeline.predict(X_test)
print(classification_report(y_test, predictions))
print("Accuracy:", accuracy_score(y_test, predictions))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85       114
           1       1.00      0.11      0.20        46

    accuracy                           0.74       160
   macro avg       0.87      0.55      0.52       160
weighted avg       0.81      0.74      0.66       160

Accuracy: 0.74375


In [31]:
X_train = train.drop(['CREDIT_SCORE','DEFAULT'], axis=1)
y_train = train['DEFAULT']

X_test = test.drop(['CREDIT_SCORE','DEFAULT'], axis=1)
y_test = test['DEFAULT']

y_pred_cv = cross_val_predict(best_pipeline, X_train, y_train, cv=5)

precision_0_cv = precision_score(y_train, y_pred_cv, pos_label=0)
recall_0_cv = recall_score(y_train, y_pred_cv, pos_label=0)
precision_1_cv = precision_score(y_train, y_pred_cv, pos_label=1)
recall_1_cv = recall_score(y_train, y_pred_cv, pos_label=1)

print("Precision for class 0 (cross-validation):", precision_0_cv)
print("Recall for class 0 (cross-validation):", recall_0_cv)
print("Precision for class 1 (cross-validation):", precision_1_cv)
print("Recall for class 1 (cross-validation):", recall_1_cv)

conf_matrix_cv = confusion_matrix(y_train, y_pred_cv)

print("Confusion Matrix (cross-validation):")
print(conf_matrix_cv)

print("Accuracy (cross-validation):", (conf_matrix_cv[0][0] + conf_matrix_cv[1][1]) / (conf_matrix_cv[0][0] + conf_matrix_cv[0][1] + conf_matrix_cv[1][0] + conf_matrix_cv[1][1]))

Precision for class 0 (cross-validation): 0.7333333333333333
Recall for class 0 (cross-validation): 0.9825708061002179
Precision for class 1 (cross-validation): 0.68
Recall for class 1 (cross-validation): 0.09392265193370165
Confusion Matrix (cross-validation):
[[451   8]
 [164  17]]
Accuracy (cross-validation): 0.73125
