In [2208]:
# Import Modules
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from pathlib import Path
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.preprocessing import StandardScaler

# Display all of the columns
pd.set_option('display.max_columns', None)

In [2209]:
# Read the CSV file into a Pandas DataFrame
credit_df = pd.read_csv(
    Path('Resources/cleaned_data_v2.csv')
)
credit_df.head()

Unnamed: 0,Age,Occupation,Annual_Income,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Changed_Credit_Limit,Num_Credit_Inquiries,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Credit_Score
0,23,Scientist,19114.12,3,4,3,4,3,7,11.27,4,_,809.98,26.82,22.1,No,49.57,80.42,High_spent_Small_value_payments,312.49,Good
1,23,Scientist,19114.12,3,4,3,4,5,4,6.27,4,Good,809.98,31.38,22.3,No,49.57,199.46,Low_spent_Small_value_payments,223.45,Good
2,23,Scientist,19114.12,3,4,3,4,3,8,11.27,4,Good,809.98,22.54,22.6,No,49.57,178.34,Low_spent_Small_value_payments,244.57,Good
3,28,_______,34847.84,2,4,6,1,3,4,5.42,2,Good,605.03,24.46,26.6,No,18.82,104.29,Low_spent_Small_value_payments,470.69,Standard
4,28,Teacher,34847.84,2,4,6,1,7,1,7.42,2,Good,605.03,38.55,26.7,No,18.82,40.39,High_spent_Large_value_payments,484.59,Good


In [2210]:
# Split target column from dataset
y = credit_df['Credit_Score']
columns_to_drop = ['Credit_Score', 'Occupation', 'Num_of_Loan', 'Credit_Utilization_Ratio', 'Credit_History_Age',
                  'Payment_of_Min_Amount', 'Total_EMI_per_month', 'Amount_invested_monthly', 'Payment_Behaviour']
X = credit_df.drop(columns=columns_to_drop)

In [2211]:
# Preview the data
X[:5]

Unnamed: 0,Age,Annual_Income,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Delay_from_due_date,Num_of_Delayed_Payment,Changed_Credit_Limit,Num_Credit_Inquiries,Credit_Mix,Outstanding_Debt,Monthly_Balance
0,23,19114.12,3,4,3,3,7,11.27,4,_,809.98,312.49
1,23,19114.12,3,4,3,5,4,6.27,4,Good,809.98,223.45
2,23,19114.12,3,4,3,3,8,11.27,4,Good,809.98,244.57
3,28,34847.84,2,4,6,3,4,5.42,2,Good,605.03,470.69
4,28,34847.84,2,4,6,7,1,7.42,2,Good,605.03,484.59


In [2212]:
# Encode the categorical variables using get_dummies
X = pd.get_dummies(X)

In [2213]:
# Preview the data
X.head()

Unnamed: 0,Age,Annual_Income,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Delay_from_due_date,Num_of_Delayed_Payment,Changed_Credit_Limit,Num_Credit_Inquiries,Outstanding_Debt,Monthly_Balance,Credit_Mix_Bad,Credit_Mix_Good,Credit_Mix_Standard,Credit_Mix__
0,23,19114.12,3,4,3,3,7,11.27,4,809.98,312.49,False,False,False,True
1,23,19114.12,3,4,3,5,4,6.27,4,809.98,223.45,False,True,False,False
2,23,19114.12,3,4,3,3,8,11.27,4,809.98,244.57,False,True,False,False
3,28,34847.84,2,4,6,3,4,5.42,2,605.03,470.69,False,True,False,False
4,28,34847.84,2,4,6,7,1,7.42,2,605.03,484.59,False,True,False,False


In [2214]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [2215]:
X_train.shape

(37142, 15)

In [2216]:
X_test.shape

(12381, 15)

In [2217]:
# Create the StandardScaler instance
scaler = StandardScaler()
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [2218]:
model = KNeighborsClassifier(n_neighbors=24)

In [2219]:
# Train the model
model.fit(X_train_scaled, y_train)

In [2220]:
# Create predictions
y_pred = model.predict(X_test_scaled)

# Review the predictions
y_pred

array(['Poor', 'Standard', 'Poor', ..., 'Poor', 'Poor', 'Poor'],
      dtype=object)

In [2221]:
# Print confusion matrix
confusion_matrix(y_pred,y_test)

array([[1452,  271,  784],
       [  49, 2197,  775],
       [ 724,  990, 5139]])

In [2222]:
# Print classification report
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

        Good       0.65      0.58      0.61      2507
        Poor       0.64      0.73      0.68      3021
    Standard       0.77      0.75      0.76      6853

    accuracy                           0.71     12381
   macro avg       0.69      0.69      0.68     12381
weighted avg       0.71      0.71      0.71     12381

