In [29]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np

from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    log_loss,
    confusion_matrix
)

import xgboost as xgb
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

import seaborn as sns
from sklearn import model_selection

## Data import

In [30]:
df = pd.read_csv("credit_risk_dataset_red_ml_ord.csv")

# scaled
#df = pd.read_csv("credit_risk_dataset_red_ml_ord_scaled.csv")

In [None]:
label = df['loan_status'] # labels
features = df.drop('loan_status',axis=1) # features
x_train, x_test, y_train, y_test = model_selection.train_test_split(features, label, 
                                                                    random_state=42, test_size=.30)

x_all = features
y_all = label

## Change data

In [32]:
df_zero = df[df['loan_status'] == 0]
df_one = df[df['loan_status'] == 1]

In [None]:
how_many_zero = 50
df_zero = df_zero.sample(n=how_many_zero, random_state=42)
how_many_one = 50
df_one = df_one.sample(n=how_many_one, random_state=42)

df_new = pd.concat([df_majority_downsampled, df_minority])

## Auxiliary functions

In [None]:
def evaluate_classification(model, X, y):
    """
    Calculates accuracy, F1, precision, recall, support, and confusion matrix.

    Parameters:
    - model: The trained classification model (must have a .predict() method).
    - X: The features to predict on.
    - y: The true labels.

    Returns:
    - metrics: A dictionary containing 'accuracy' and 'confusion_matrix'.
    - report_df: A pandas DataFrame containing precision, recall, f1-score, and support per class.
    """
    # 1. Generate Predictions
    y_pred = model.predict(X)

    # 2. Calculate Metrics
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    # 3. Calculate Precision, Recall, F1, and Support
    # precise_recall_fscore_support returns lists for each class
    precision_l, recall_l, f1_l, support_l = precision_recall_fscore_support(y, y_pred)

    # 4. Create a DataFrame for per-class metrics
    # We attempt to use model.classes_ if available for row names, otherwise simple indices
    try:
        classes = model.classes_
    except AttributeError:
        classes = list(range(len(precision_l)))

    report_df = pd.DataFrame({
        'Precision': precision_l,
        'Recall': recall_l,
        'F1-Score': f1_l,
        'Support': support_l
    }, index=classes)

    # 5. Calculate Confusion Matrix
    conf_matrix = confusion_matrix(y, y_pred)


    # Bundle everything into a dictionary
    metrics = {
        "accuracy": accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        "confusion_matrix": conf_matrix
    }


    print(report_df)

    for key in metrics.keys():
        print(key, metrics[key])
        
    print(conf_matrix)

    return metrics, report_df

## SVM

In [None]:
# kernel linear, poly, rbf, sigmoid
svm_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')

svm_classifier.fit(x_train, y_train)

evaluate_classification(svm_classifier, x_test, y_test)

In [None]:
# If evaluate doesnt work
y_pred = svm_classifier.predict(x_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))