# Import relevant modules and data

In [1]:
# Import relevant modules
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC

In [2]:
# Load modified & cleaned personal loan data into a Pandas DataFrame
df_loan_data = pd.read_pickle('df_loan_data.pkl')

# Transform data to suit the SVC algorithm

SVC models require scaled data. The data used here is pre-scaled from the data_preparation notebook.

SVC models perform better with balanced classes (and this data has reasonably strong imbalance). This re-balancing will be handled via SMOTE techniques within model training

# Train a model on all available features

In [3]:
# Define the target variable
target_variable = 'personal_loan'

In [4]:
# Separate features and target variable
X = df_loan_data.drop(columns=[target_variable])
y = df_loan_data[target_variable]

In [5]:
# Initialize cross-validation
n_splits = 5  # You can adjust the number of folds
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [6]:
# Initialize list to store results
auc_scores = []

In [7]:
# Perform cross-validation
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Apply SMOTE to oversample the minority class
    smote = SMOTE(sampling_strategy='auto', random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Initialize the SVM model (Support Vector Classifier)
    svm_model = SVC(probability=True, C=1.0, kernel='rbf')

    # Fit the SVM model
    svm_model.fit(X_train_resampled, y_train_resampled)

    # Make predictions
    y_pred = svm_model.predict(X_test)

    # Calculate ROC AUC score
    auc = roc_auc_score(y_test, y_pred)
    auc_scores.append(auc)

In [8]:
# Calculate the mean AUC score across cross-validation folds
mean_auc = np.mean(auc_scores)
print('Mean AUC: {}'.format(mean_auc))

Mean AUC: 0.870810544469081
