In [13]:
import warnings

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from bayes_opt import BayesianOptimization
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier



ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

In [6]:
warnings.filterwarnings("ignore")

train_df = pd.read_csv('data/loan_sanction_train.csv')

# The original test file doesn't contain the Loan_Status field
# Nevertheless loading it to construct a test set for another algorithm
test_df = pd.read_csv('data/loan_sanction_test.csv')

for df in [train_df, test_df]:
    # Convert categorical variables into numeric
    df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})
    df['Married'] = df['Married'].map({'Yes': 1, 'No': 0})
    df['Education'] = df['Education'].map({'Graduate': 1, 'Not Graduate': 0})
    df['Self_Employed'] = df['Self_Employed'].map({'Yes': 1, 'No': 0})
    df['Property_Area'] = df['Property_Area'].map({'Urban': 2, 'Semiurban': 1, 'Rural': 0})
    df['Dependents'] = df['Dependents'].replace('3+', 3)

    # Fill missing values. Do it after converting categorical values into numeric
    df['LoanAmount'].fillna(df['LoanAmount'].mean(), inplace=True)
    df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mode()[0], inplace=True)
    df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)
    df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
    df['Married'].fillna(df['Married'].mode()[0], inplace=True)
    df['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)
    df['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)

    # Create extra features that can be useful and meaningful
    df['TotalIncome'] = df['ApplicantIncome'] + df['CoapplicantIncome']
    df['LoanIncomeRatio'] = df['LoanAmount'] / df['TotalIncome']

# Convert the output variable into numeric
train_df['Loan_Status'] = train_df['Loan_Status'].map({'Y': 1, 'N': 0})
X = train_df.drop(columns=['Loan_ID', 'Loan_Status'])
y = train_df['Loan_Status']

# Carry out train/test split from the given training data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
def create_cnn_model(learning_rate, dropout_rate, num_filters, kernel_size):
    model = Sequential()
    model.add(Conv1D(filters=int(num_filters), kernel_size=int(kernel_size), activation='relu', input_shape=(X_train.shape[1], 1)))
    model.add(Flatten())
    model.add(Dropout(rate=dropout_rate))
    model.add(Dense(1, activation='sigmoid'))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model


In [8]:
def optimize_cnn(learning_rate, dropout_rate, num_filters, kernel_size):
    model = KerasClassifier(build_fn=create_cnn_model,
                            learning_rate=learning_rate,
                            dropout_rate=dropout_rate,
                            num_filters=num_filters,
                            kernel_size=kernel_size,
                            epochs=10,
                            batch_size=10,
                            verbose=0)

    return cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()


In [9]:
param_space = {
    'learning_rate': (0.0001, 0.01),
    'dropout_rate': (0.1, 0.5),
    'num_filters': (10, 50),
    'kernel_size': (2, 5)
}


In [10]:
optimizer = BayesianOptimization(
    f=optimize_cnn,
    pbounds=param_space,
    random_state=42
)

# Start the optimization process
optimizer.maximize(init_points=5, n_iter=25)


NameError: name 'BayesianOptimization' is not defined

In [None]:
best_params = optimizer.max['params']
best_model = create_cnn_model(
    learning_rate=best_params['learning_rate'],
    dropout_rate=best_params['dropout_rate'],
    num_filters=best_params['num_filters'],
    kernel_size=best_params['kernel_size']
)

best_model.fit(X_train, y_train, epochs=50, batch_size=10, verbose=0)
y_pred = best_model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to class labels

accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized CNN Accuracy: {accuracy}")
