In [2]:
import warnings

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score

from utils import *

In [3]:
warnings.filterwarnings("ignore")

X_train, X_test, y_train, y_test, train_df, test_df = preprocess_data(standardise=False)

In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_model = LogisticRegression()
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)
lr_accuracy = accuracy_score(y_test, y_pred_lr)
lr_report = classification_report(y_test, y_pred_lr)
print(lr_report)
print(lr_accuracy)

              precision    recall  f1-score   support

           0       0.90      0.42      0.57        43
           1       0.76      0.97      0.85        80

    accuracy                           0.78       123
   macro avg       0.83      0.70      0.71       123
weighted avg       0.81      0.78      0.75       123

0.7804878048780488


In [5]:
def optimise_logistic_regression(C, max_iter):
    model = LogisticRegression(
        C=C,
        max_iter=int(max_iter),
        random_state=42,
        solver='liblinear'  # You can change the solver if needed
    )
    return cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()


In [6]:
param_space = np.array([
    (0.01, 10),  # Regularization strength
    (100, 1000)  # Number of iterations
])

n_iters = 25
initial_samples = 5

# Initial random samples
x0 = np.random.uniform(param_space[:, 0], param_space[:, 1], size=(initial_samples, param_space.shape[0]))
y0 = np.array([optimise_logistic_regression(*params) for params in x0])

gp_params = {"alpha": 1e-6}


In [14]:
X_sample, Y_sample, gpr = bayesian_optimisation(n_iters, optimise_logistic_regression, param_space, x0, y0.reshape(-1, 1), gp_params)

# Best parameters
best_idx = np.argmax(Y_sample)
best_params = X_sample[best_idx]
best_accuracy = Y_sample[best_idx]

print(f"Best accuracy: {best_accuracy}")

Best accuracy: [0.81255411]
