In [19]:
import warnings

import numpy as np
import pandas as pd

from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

from utils import *

In [20]:
warnings.filterwarnings("ignore")

X_train, X_test, y_train, y_test, train_df, test_df = preprocess_data(standardise=True)

In [21]:
def optimise_knn(n_neighbors, leaf_size, p):
    model = KNeighborsClassifier(
        n_neighbors=int(n_neighbors),
        leaf_size=int(leaf_size),
        p=int(p)
    )
    return cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()


In [22]:
param_space = np.array([
    (1, 30),    # 'n_neighbors'
    (20, 40),   # 'leaf_size'
    (1, 2)      # Minkowski metric parameter (1 for Manhattan, 2 for Euclidean)
])

n_iters = 25
initial_samples = 5

# Initial random samples
x0 = np.random.uniform(param_space[:, 0], param_space[:, 1], size=(initial_samples, param_space.shape[0]))
y0 = np.array([optimise_knn(*params) for params in x0])

gp_params = {"alpha": 1e-6}

In [23]:
X_sample, Y_sample, gpr = bayesian_optimisation(n_iters, optimise_knn, param_space, x0, y0.reshape(-1, 1), gp_params)

# Best parameters
best_idx = np.argmax(Y_sample)
best_params = X_sample[best_idx]
best_accuracy = Y_sample[best_idx]

print(f"Best accuracy: {best_accuracy}")


Best accuracy: [0.80441146]


In [24]:
model = KNeighborsClassifier()

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized KNN Accuracy: {accuracy}")

Optimized KNN Accuracy: 0.7560975609756098


In [25]:
best_model = KNeighborsClassifier(
    n_neighbors=int(best_params[0]),
    leaf_size=int(best_params[1]),
    p=int(best_params[2])
)

best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to class labels

accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized CNN Accuracy: {accuracy}")

Optimized CNN Accuracy: 0.7886178861788617


In [18]:
# test_df_generated = pd.read_csv('data/loan_sanction_test_with_predictions_decision_tree.csv')
# 
# test_df_generated['Loan_Status'] = test_df_generated['Loan_Status'].map({'Y': 1, 'N': 0})
# test_df_generated.head()
# X_test_generated = test_df_generated.drop(['Loan_ID', 'Loan_Status'], axis=1)
# Y_test_generated = test_df_generated['Loan_Status']
# 
# 
# Y_test_predict = best_model.predict(X_test_generated)
# accuracy_generated = accuracy_score(Y_test_generated, Y_test_predict)
# print(f"Optimized KNN Accuracy: {accuracy_generated}")