In [7]:
import pandas as pd

# Load the dataset from a CSV file
df = pd.read_csv('customer_data.csv')

# Display the first few rows of the dataset
print("First Few Rows of the Dataset:")
print(df.head())

# Summarize the dataset
print("\nSummary Statistics:")
print(df.describe())

First Few Rows of the Dataset:
  Customer_ID  Age  Income  Defaulted
0  CST_000001   49  131993          0
1  CST_000002   43  115246          0
2  CST_000003   47  109268          1
3  CST_000004   60  133530          0
4  CST_000005   41  102622          1

Summary Statistics:
               Age         Income   Defaulted
count  1000.000000    1000.000000  1000.00000
mean     43.646000  105190.194000     0.50000
std       9.799015   22229.151176     0.50025
min      15.000000   39841.000000     0.00000
25%      37.000000   90960.750000     0.00000
50%      44.000000  103727.000000     0.50000
75%      50.000000  119564.500000     1.00000
max      75.000000  187495.000000     1.00000


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix, roc_curve
import numpy as np
from sklearn.preprocessing import StandardScaler

# Assume 'df' is the DataFrame from the previous pandas example

# Select features and the target variable
X = df[["Age", "Income"]]  # Features
y = df["Defaulted"]        # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Normalization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Train an SVC model
model = SVC(kernel='linear', probability=True)  # probability=True to enable predict_proba
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]  # Get probabilities for the positive class

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)

# Calculate KS statistic
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
ks = np.max(tpr - fpr)

# Classification report
class_report = classification_report(y_test, y_pred)

# Print the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"AUC: {auc:.4f}")
print(f"KS Statistic: {ks:.4f}")
print("Classification Report:\n", class_report)

Accuracy: 0.5850
AUC: 0.6534
KS Statistic: 0.2652
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.53      0.55        96
           1       0.59      0.63      0.61       104

    accuracy                           0.58       200
   macro avg       0.58      0.58      0.58       200
weighted avg       0.58      0.58      0.58       200



In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix, roc_curve
import numpy as np
from sklearn.preprocessing import StandardScaler

# Assume 'df' is the DataFrame from the previous pandas example

# Select features and the target variable
X = df[["Age", "Income"]]  # Features
y = df["Defaulted"]        # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Normalization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Train an SVC model
model = SVC(kernel='linear', probability=True)  # probability=True to enable predict_proba
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]  # Get probabilities for the positive class

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)

# Calculate KS statistic
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
ks = np.max(tpr - fpr)

# Classification report
class_report = classification_report(y_test, y_pred)

# Print the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"AUC: {auc:.4f}")
print(f"KS Statistic: {ks:.4f}")
print("Classification Report:\n", class_report)

Accuracy: 0.5850
AUC: 0.6534
KS Statistic: 0.2652
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.53      0.55        96
           1       0.59      0.63      0.61       104

    accuracy                           0.58       200
   macro avg       0.58      0.58      0.58       200
weighted avg       0.58      0.58      0.58       200

