In [29]:
import warnings

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

from utils import preprocess_data


In [30]:
warnings.filterwarnings("ignore")

X_train, X_test, y_train, y_test, train_df, test_df = preprocess_data(standardise=False)

In [31]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_model = LogisticRegression()
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)
lr_accuracy = accuracy_score(y_test, y_pred_lr)
lr_report = classification_report(y_test, y_pred_lr)
print(lr_report)
print(lr_accuracy)

              precision    recall  f1-score   support

           0       0.90      0.42      0.57        43
           1       0.76      0.97      0.85        80

    accuracy                           0.78       123
   macro avg       0.83      0.70      0.71       123
weighted avg       0.81      0.78      0.75       123

0.7804878048780488


In [32]:
def optimize_logistic_regression(C, max_iter):
    model = LogisticRegression(
        C=C,
        max_iter=int(max_iter),
        random_state=42,
        solver='liblinear'  # You can change the solver if needed
    )
    return cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()


In [33]:
param_space = {
    'C': (0.01, 10),  # Regularization strength
    'max_iter': (100, 1000)  # Number of iterations
}


In [34]:
optimizer = BayesianOptimization(
    f=optimize_logistic_regression,
    pbounds=param_space,
    random_state=42
)

# Start the optimization process
optimizer.maximize(init_points=5, n_iter=25)

|   iter    |  target   |     C     | max_iter  |
-------------------------------------------------
| [39m1        [39m | [39m0.8146   [39m | [39m3.752    [39m | [39m955.6    [39m |
| [39m2        [39m | [39m0.8126   [39m | [39m7.323    [39m | [39m638.8    [39m |
| [39m3        [39m | [39m0.8126   [39m | [39m1.569    [39m | [39m240.4    [39m |
| [39m4        [39m | [39m0.8105   [39m | [39m0.5903   [39m | [39m879.6    [39m |
| [39m5        [39m | [39m0.8105   [39m | [39m6.015    [39m | [39m737.3    [39m |
| [39m6        [39m | [39m0.8105   [39m | [39m3.059    [39m | [39m956.3    [39m |
| [39m7        [39m | [39m0.8126   [39m | [39m1.378    [39m | [39m240.5    [39m |
| [39m8        [39m | [39m0.8105   [39m | [39m4.033    [39m | [39m955.4    [39m |
| [39m9        [39m | [39m0.8126   [39m | [39m3.672    [39m | [39m955.7    [39m |
| [39m10       [39m | [39m0.8126   [39m | [39m4.44     [39m | [39m820.0    [39m |


In [35]:
best_params = optimizer.max['params']
best_model = LogisticRegression(
    C=best_params['C'],
    max_iter=int(best_params['max_iter']),
    random_state=42,
    solver='liblinear'
)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized Logistic Regression Accuracy: {accuracy}")


Optimized Logistic Regression Accuracy: 0.7886178861788617
