In [4]:
#LogisticRegression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib

# Dataset
dataset = pd.read_csv('/content/Accounts.csv')

X = dataset.iloc[:, 1:43].values
y = dataset.iloc[:, 43].values

# Split data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define hyperparameters
lr_parameters = {'C': [10**i for i in range(-3, 4)], 'penalty': ['l1', 'l2']}

# Cross-validation strategy
outer_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
inner_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Initialize metrics
lr_accuracy_scores = []
lr_precision_scores = []
lr_recall_scores = []
lr_f1_scores = []

# Store the best model
best_lr_model = None
best_lr_score = 0.0

# Cross-validation
for train_index, test_index in outer_cv.split(X_train, y_train):
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

    lr_classifier = LogisticRegression(random_state=42)
    lr_grid_search = GridSearchCV(lr_classifier, lr_parameters, cv=inner_cv)
    lr_grid_search.fit(X_train_fold, y_train_fold)
    lr_best_params = lr_grid_search.best_params_
    lr_best_model = LogisticRegression(random_state=42, **lr_best_params)

    # Fit best model
    lr_best_model.fit(X_train_fold, y_train_fold)

    # Evaluation on test set
    lr_predictions = lr_best_model.predict(X_test_fold)
    current_score = accuracy_score(y_test_fold, lr_predictions)

    # Store best model
    if current_score > best_lr_score:
        best_lr_score = current_score
        best_lr_model = lr_best_model

    # Metrics
    lr_accuracy_scores.append(accuracy_score(y_test_fold, lr_predictions))
    lr_precision_scores.append(precision_score(y_test_fold, lr_predictions))
    lr_recall_scores.append(recall_score(y_test_fold, lr_predictions))
    lr_f1_scores.append(f1_score(y_test_fold, lr_predictions))

# Save
if best_lr_model is not None:
    joblib.dump(best_lr_model, 'best_logistic_regression_model.pkl')

# Test set
lr_average_accuracy_test = np.mean(lr_accuracy_scores)
lr_average_precision_test = np.mean(lr_precision_scores)
lr_average_recall_test = np.mean(lr_recall_scores)
lr_average_f1_test = np.mean(lr_f1_scores)

# Print Scores
print("\nLogistic Regression Test Set:")
print("Average Accuracy:", round(lr_average_accuracy_test, 4))
print("Average Precision:", round(lr_average_precision_test, 4))
print("Average Recall:", round(lr_average_recall_test, 4))
print("Average F1-score:", round(lr_average_f1_test, 4))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number 


Logistic Regression Test Set:
Average Accuracy: 0.9845
Average Precision: 0.988
Average Recall: 0.981
Average F1-score: 0.9844


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
