In [50]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings('ignore')

In [51]:
data = pd.read_csv("diabetes.csv")

In [52]:
print(data.isnull().sum())

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [54]:
data.fillna(data.mean(), inplace=True)

In [55]:
# Split dataset into inputs and outputs
values = data.values
X = values[:,0:8]
y = values[:,8]

In [56]:
# Initiate the LR model with random hyperparameters
lr = LogisticRegression(penalty='l2',dual=False,max_iter=110)

The hyperparameters that you used are:
penalty : Used to specify the norm used in the penalization (regularization).

dual : Dual or primal formulation. 
   The dual formulation is only implemented for l2 penalty with liblinear solver. 
   Prefer dual=False when n_samples > n_features.
   
max_iter : Maximum number of iterations taken to converge.

In [59]:
# Pass data to the LR model
lr.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=110,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
lr.score(X,y)

In [60]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [61]:
# Build the k-fold cross-validator

kfold = KFold(n_splits=3, random_state=7)

In [62]:
result = cross_val_score(lr, X, y, cv=kfold, scoring='accuracy')
print(result.mean())

0.7721354166666666


In [63]:
from sklearn.model_selection import GridSearchCV

In [64]:
dual=[True,False]
max_iter=[100,110,120,130,140]
param_grid = dict(dual=dual,max_iter=max_iter)

In [65]:
import time

lr = LogisticRegression(penalty='l2')
grid = GridSearchCV(estimator=lr, param_grid=param_grid, cv = 3, n_jobs=-1)

start_time = time.time()
grid_result = grid.fit(X, y)
# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')

Best: 0.772135 using {'dual': False, 'max_iter': 100}
Execution time: 0.835690975189209 ms


In [66]:
dual=[True,False]
max_iter=[100,110,120,130,140]
C = [1.0,1.5,2.0,2.5]
param_grid = dict(dual=dual,max_iter=max_iter,C=C)

In [67]:
lr = LogisticRegression(penalty='l2')
grid = GridSearchCV(estimator=lr, param_grid=param_grid, cv = 3, n_jobs=-1)

start_time = time.time()
grid_result = grid.fit(X, y)

In [68]:
# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')

Best: 0.773438 using {'C': 2.5, 'dual': False, 'max_iter': 100}
Execution time: 2.556572914123535 ms


In [69]:
from sklearn.model_selection import RandomizedSearchCV

In [70]:
random = RandomizedSearchCV(estimator=lr, param_distributions=param_grid, cv = 3, n_jobs=-1)

start_time = time.time()
random_result = random.fit(X, y)
# Summarize results
print("Best: %f using %s" % (random_result.best_score_, random_result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')

Best: 0.770833 using {'max_iter': 100, 'dual': False, 'C': 1.5}
Execution time: 0.9716832637786865 ms
