In [1]:
# Import Libraries
import numpy as np
import pandas as pd

# create a synthetic dataset with 300 records 
np.random.seed(42)
data_size = 300


age = np.random.randint(20, 75, size = data_size) # Customer age
income = np.random.randint(30000, 160000, size = data_size) # Annual income
debt = np.random.randint(300, 40000, size = data_size)
default = np.random.choice([0,1], size = data_size, p = [0.7, 0.3])

# Create a DataFrame 
df = pd.DataFrame({
    'Age': age,
    'Income': income,
    'Debt' : debt,
    'Default': default
})

#Display the head of the dataframe 
df.head()



Unnamed: 0,Age,Income,Debt,Default
0,58,51918,9735,0
1,71,115981,13596,1
2,48,90713,30745,0
3,34,60306,13545,0
4,62,46646,29424,1


In [11]:
# Import libraries to split the datasets into train and test datasets 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Assign feautres and target values 
x = df[['Age', 'Income', 'Debt']]
y = df['Default']

#Split the data into train(75%) and test(25%) 
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = .25, random_state = 42)

#Standardize the data 
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

In [12]:
print(x_test_scaled)

[[ 0.38545893  1.67780652 -0.82471962]
 [ 0.44736347  0.31822811  1.12478844]
 [-1.59548633 -0.3017225   1.2141644 ]
 [-0.23358646  1.20718114  1.3213665 ]
 [-1.53358179 -0.24378268  0.69401757]
 [ 0.88069525 -1.179961    1.08660127]
 [-0.41930008 -0.19644889  1.41891958]
 [-0.10977738  1.08332664 -0.81204508]
 [-1.16215455 -0.43848867  0.72271927]
 [ 0.75688617 -0.36128984  1.08251271]
 [ 1.37593156 -0.83858806  1.13664527]
 [ 1.62354972 -1.63130023  0.16937331]
 [ 0.94259979  1.75785352 -0.07373258]
 [ 1.49974064 -0.39750222 -0.02426098]
 [ 0.32355439  0.66556863  1.25390523]
 [-0.72882278 -1.29307384 -0.64147028]
 [ 0.38545893 -0.09141941 -0.43172706]
 [-1.28596363 -1.09400066  1.26142818]
 [ 0.63307709 -1.0827165   0.42482665]
 [ 1.49974064  0.08983239  0.01408973]
 [ 1.68545426  1.69823193 -0.04887412]
 [-1.59548633  0.29121123 -1.63531789]
 [-0.48120462  0.78082438 -1.69075879]
 [-1.53358179  0.64188817 -1.68912337]
 [ 0.88069525  0.81839303 -1.04059569]
 [ 1.56164518 -0.26195669

# Linear Regression 

In [3]:
#Import libarary for LinearRegression and accuract test
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score

# Train a linear Regression model
lr_model = LinearRegression()
lr_model.fit(x_train, y_train)

# predict using linear regression model
y_pred_lr = lr_model.predict(x_test)

# Convert prediction to binary classification(threshold = 0.5)
y_pred_lr_class =[ 1 if i>0.5 else 0 for i in y_pred_lr]

# Evaluate Linear Regression Model
accuracy_lr = accuracy_score(y_test, y_pred_lr_class)
print(f"Linear Regression Accracy: {accuracy_lr:.2f}")

Linear Regression Accracy: 0.68


# Random Forest

In [4]:
# Import RandomForest 
from sklearn.ensemble import RandomForestClassifier

# Train a Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators = 150, random_state = 42)
rf_model.fit(x_train, y_train)

# Predict using the random forest classifier 
y_pred_rf = rf_model.predict(x_test)

# Evaluate Random Forest Model 
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy : {accuracy_rf:.2f}")


Random Forest Accuracy : 0.65


# Support Vector Machine(SVM)

In [13]:
# Import SVC 
from sklearn.svm import SVC

# Train an SVM classifier 
svm_model = SVC(kernel = 'linear', random_state = 42)
svm_model.fit(x_train_scaled, y_train)

#Predict using the SVM model
y_pred_svm = svm_model.predict(x_test_scaled)

# Evaluate SVM model 
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm:.2f}")

SVM Accuracy: 0.68


In [15]:
print("Model Comparison: ")
print(f"Linear Regression Accuracy: {accuracy_lr:.2f}")
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

Model Comparison: 
Linear Regression Accuracy: 0.68
Random Forest Accuracy: 0.65
SVM Accuracy: 0.68
