In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [10]:
# Setting random seed for reproducibility
np.random.seed(42)

# Number of data points
n_points = 5000

# Number of features
n_features = 30

# Meaningful feature names
feature_names = [
    'balance', 'num_transactions', 'credit_score', 'age', 'income', 
    'loan_amount', 'savings', 'investment', 'debt', 'num_credit_cards', 
    'mortgage', 'loan_term', 'employment_years', 'account_age', 'expenses',
    'credit_utilization', 'payment_history', 'credit_limit', 'interest_rate', 
    'loan_type', 'region', 'marital_status', 'num_dependents', 'education_level', 
    'employment_status', 'home_ownership', 'phone_type', 'internet_banking', 
    'mobile_banking', 'branch_visits'
]

# Generating random features
X = np.random.randn(n_points, n_features)

# Generating target classes (0 or 1) to simulate binary classification in the banking sector
# For instance, 0: Default, 1: Non-Default
y = np.random.choice([0, 1], size=n_points)

# Creating a DataFrame to hold the data
data = pd.DataFrame(X, columns=feature_names)
data['default'] = y

In [11]:
data.head(5)

Unnamed: 0,balance,num_transactions,credit_score,age,income,loan_amount,savings,investment,debt,num_credit_cards,...,marital_status,num_dependents,education_level,employment_status,home_ownership,phone_type,internet_banking,mobile_banking,branch_visits,default
0,0.496714,-0.138264,0.647689,1.52303,-0.234153,-0.234137,1.579213,0.767435,-0.469474,0.54256,...,-0.225776,0.067528,-1.424748,-0.544383,0.110923,-1.150994,0.375698,-0.600639,-0.291694,0
1,-0.601707,1.852278,-0.013497,-1.057711,0.822545,-1.220844,0.208864,-1.95967,-1.328186,0.196861,...,-0.385082,-0.676922,0.611676,1.031,0.93128,-0.839218,-0.309212,0.331263,0.975545,1
2,-0.479174,-0.185659,-1.106335,-1.196207,0.812526,1.35624,-0.07201,1.003533,0.361636,-0.64512,...,0.357113,1.477894,-0.51827,-0.808494,-0.501757,0.915402,0.328751,-0.52976,0.513267,1
3,0.097078,0.968645,-0.702053,-0.327662,-0.392108,-1.463515,0.29612,0.261055,0.005113,-0.234587,...,-0.026514,0.06023,2.463242,-0.192361,0.301547,-0.034712,-1.168678,1.142823,0.751933,0
4,0.791032,-0.909387,1.402794,-1.401851,0.586857,2.190456,-0.990536,-0.566298,0.099651,-0.503476,...,1.307143,-1.607483,0.184634,0.259883,0.781823,-1.236951,-1.320457,0.521942,0.296985,1


In [12]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[feature_names], data['default'], test_size=0.3, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Creating and training the SVM classifier
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)

# Making predictions
y_pred = svm.predict(X_test)

# Evaluating the classifier
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.58      0.53       745
           1       0.50      0.42      0.46       755

    accuracy                           0.50      1500
   macro avg       0.50      0.50      0.50      1500
weighted avg       0.50      0.50      0.50      1500

Accuracy Score:
0.49933333333333335
