In [None]:
'''There are a number of classification algorithms that can be used to determine
 loan elgibility. Some algorithms run better than others. 
 Build a loan approver using the SVM algorithm and compare the accuracy and performance 
 of the SVM model with the Logistic Regression model.'''


In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
df = pd.read_csv('./Resouces/loans.csv')
df.head()    # had already been normalized


Unnamed: 0,assets,liabilities,income,credit_score,mortgage,status
0,0.210859,0.452865,0.281367,0.628039,0.302682,deny
1,0.395018,0.661153,0.330622,0.638439,0.502831,approve
2,0.291186,0.593432,0.438436,0.434863,0.315574,approve
3,0.45864,0.576156,0.744167,0.291324,0.394891,approve
4,0.46347,0.292414,0.489887,0.811384,0.566605,approve


In [3]:
# STEP 0: data prepartion
y = df['status']
X = df.drop('status', axis=1)
X.shape          # a tuple: (100,5)


(100, 5)

In [4]:
# STEP 1: train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, 
                                                    random_state = 1)

# STEP 2: Instantiate a SVC model
model = SVC(kernel='linear')

# STEP 3: train model
model.fit(X_train, y_train)

# STEP 4: make prediction
y_pred = model.predict(X_test)

results = pd.DataFrame({'Predicted y': y_pred, 'Actual y': y_test}).reset_index(drop=True)
results
# STEP 5-1 model score
model_score = model.score(X_test,y_test)
print(model_score)


0.6


In [9]:
# Step 5-1: evaluate, validate
acc_score = accuracy_score(y_test, y_pred)
acc_score            #same as model_score

# STEP 5-2: CM
cm_df = pd.DataFrame(confusion_matrix(y_test, y_pred), index=['Actual +', 'Actual -'],
                    columns=['Predicted +', 'Predicted -'])
cm_df

Unnamed: 0,Predicted +,Predicted -
Actual +,7,5
Actual -,5,8


In [5]:
# STEP 5-3: classification report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

     approve       0.58      0.58      0.58        12
        deny       0.62      0.62      0.62        13

    accuracy                           0.60        25
   macro avg       0.60      0.60      0.60        25
weighted avg       0.60      0.60      0.60        25



 Summary: The model did not perform adaquately
to pose an acceptable risk for a lender due to lower accuracy score, and lower f1 score.

 A comparable logistic Regression model

In [6]:
from sklearn.linear_model import LogisticRegression

# step 2
Logit_model = LogisticRegression(solver='lbfgs', random_state=2)

# step 3
Logit_model.fit(X_train, y_train)

# step 4
Logit_model_y_pred = Logit_model.predict(X_test)

Logit_model_result = pd.DataFrame({'Logit Predicted Y': Logit_model_y_pred,
                                    'Logit Actual Y': y_test}).reset_index(drop=True)
Logit_model_result

Unnamed: 0,Logit Predicted Y,Logit Actual Y
0,deny,deny
1,deny,approve
2,deny,deny
3,approve,deny
4,deny,deny
5,deny,approve
6,deny,deny
7,approve,deny
8,deny,deny
9,approve,approve


In [7]:
# Evaluate, validate, assess
Logit_acc = accuracy_score(y_test,Logit_model_y_pred)
print(Logit_acc)


0.52


In [8]:
Logit_CM_df = pd.DataFrame(confusion_matrix(y_test, Logit_model_y_pred), index=['Actual +', 'Actual -'],
                    columns=['Predicted +', 'Predicted -'])
Logit_CM_df


Unnamed: 0,Predicted +,Predicted -
Actual +,3,9
Actual -,3,10


In [10]:
Logit_report = classification_report(y_test,Logit_model_y_pred)
print(Logit_report)


              precision    recall  f1-score   support

     approve       0.50      0.25      0.33        12
        deny       0.53      0.77      0.62        13

    accuracy                           0.52        25
   macro avg       0.51      0.51      0.48        25
weighted avg       0.51      0.52      0.48        25

