In [2]:
import warnings

import numpy as np
from scikeras.wrappers import KerasClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from utils import preprocess_data, bayesian_optimisation

warnings.filterwarnings("ignore")

In [3]:
X_train, X_test, y_train, y_test, train_df, test_df = preprocess_data(standardise=True)

In [4]:
def create_cnn_model(learning_rate, dropout_rate, num_filters, kernel_size):
    model = Sequential()
    model.add(Conv1D(filters=int(num_filters), kernel_size=int(kernel_size), activation='relu', input_shape=(X_train.shape[1], 1)))
    model.add(Flatten())
    model.add(Dropout(rate=dropout_rate))
    model.add(Dense(1, activation='sigmoid'))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model


In [5]:
def optimise_cnn(learning_rate, dropout_rate, num_filters, kernel_size):
    model = KerasClassifier(build_fn=create_cnn_model,
                            learning_rate=learning_rate,
                            dropout_rate=dropout_rate,
                            num_filters=num_filters,
                            kernel_size=kernel_size,
                            epochs=10,
                            batch_size=10,
                            verbose=0)

    return cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()

In [6]:
param_space = np.array([
    (0.0001, 0.01),     # learning_rate
    (0.1, 0.5),         # dropout_rate
    (10, 50),           # num_filters
    (2, 5)              # kernel_size
])
n_iters = 25
initial_samples = 5

# Initial random samples
x0 = np.random.uniform(param_space[:, 0], param_space[:, 1], size=(initial_samples, param_space.shape[0]))
y0 = np.array([optimise_cnn(*params) for params in x0])

gp_params = {"alpha": 1e-6}

In [10]:
X_sample, Y_sample, gpr = bayesian_optimisation(n_iters, optimise_cnn, param_space, x0, y0.reshape(-1, 1), gp_params)

# Best parameters
best_idx = np.argmax(Y_sample)
best_params = X_sample[best_idx]
best_accuracy = Y_sample[best_idx]

print(f"Best accuracy: {best_accuracy}")

Best accuracy: [0.81060153]


In [12]:
best_model = create_cnn_model(
    learning_rate=best_params[0],
    dropout_rate=best_params[1],
    num_filters=best_params[2],
    kernel_size=best_params[3]
)

best_model.fit(X_train, y_train, epochs=50, batch_size=10, verbose=0)
y_pred = best_model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to class labels

accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized CNN Accuracy: {accuracy}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Optimized CNN Accuracy: 0.7642276422764228


In [23]:
# test_df_generated = pd.read_csv('data/loan_sanction_test_with_predictions_decision_tree.csv')
# 
# test_df_generated['Loan_Status'] = test_df_generated['Loan_Status'].map({'Y': 1, 'N': 0})
# test_df_generated.head()
# X_test_generated = test_df_generated.drop(['Loan_ID', 'Loan_Status'], axis=1)
# Y_test_generated = test_df_generated['Loan_Status']
# 
# 
# Y_test_predict = best_model.predict(X_test_generated)
# accuracy_generated = accuracy_score(Y_test_generated, Y_test_predict)
# print(f"Optimized KNN Accuracy: {accuracy_generated}")