In [120]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [122]:
# Load dataset
dataset = pd.read_csv("./CKD.csv")

In [124]:
# Separate independent and dependent variables
independent = dataset.drop("classification", axis=1)
dependent = dataset["classification"]

In [126]:
# convert categorical data into binary format(one-hot encoding)
independent = pd.get_dummies(independent, drop_first=True)

In [128]:
# Dependent variable is categorical (e.g., 'yes'/'no'), encode it to numerical values
label_encoder = LabelEncoder()
dependent = label_encoder.fit_transform(dependent)

In [130]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=1/3, random_state=0)

In [132]:
classification = LogisticRegression()
classification.fit(x_train, y_train)
model_predict = classification.predict(x_test)
report = classification_report(y_test, model_predict)
print(report)

              precision    recall  f1-score   support

           0       0.89      0.92      0.90        51
           1       0.95      0.93      0.94        82

    accuracy                           0.92       133
   macro avg       0.92      0.92      0.92       133
weighted avg       0.93      0.92      0.93       133



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [136]:
param_grid = {
    'solver': ['liblinear', 'saga', 'newton-cholesky'],
    'penalty': ['l2'],
    'C': [0.1, 1, 10]  # Regularization strength
}

grid = GridSearchCV(LogisticRegression(), param_grid, refit=True, verbose = 3, n_jobs=-1, scoring='f1_weighted')
grid.fit(x_train, y_train)
print("Best Parameters:", grid.best_params_)
model_predict = classification.predict(x_test)
report = classification_report(y_test, model_predict)
print(report)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
Best Parameters: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
              precision    recall  f1-score   support

           0       0.89      0.92      0.90        51
           1       0.95      0.93      0.94        82

    accuracy                           0.92       133
   macro avg       0.92      0.92      0.92       133
weighted avg       0.93      0.92      0.93       133



