In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score



In [2]:
df = pd.read_csv('pima-indians-diabetes.data.csv', header=None)

In [3]:
df.shape

(768, 9)

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [6]:
pipeline = make_pipeline(MinMaxScaler(), LogisticRegression(solver='liblinear'))

In [7]:
pipeline

Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                ('logisticregression', LogisticRegression(solver='liblinear'))])

In [12]:
alphas = [0.1,0.2,0.5,1.0,2.0,5.0,10.0]
penalty = ['l1', 'l2']
params = {
    'logisticregression__C' : alphas,
    'logisticregression__penalty' : penalty
}

In [13]:
grid = GridSearchCV(pipeline, param_grid=params, cv=5)

In [14]:
grid.fit(X,y)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                                       ('logisticregression',
                                        LogisticRegression(solver='liblinear'))]),
             param_grid={'logisticregression__C': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0,
                                                   10.0],
                         'logisticregression__penalty': ['l1', 'l2']})

In [15]:
grid.best_params_

{'logisticregression__C': 10.0, 'logisticregression__penalty': 'l1'}

In [16]:
grid.best_score_

0.7721840251252015

In [17]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_logisticregression__C,param_logisticregression__penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003983,0.001102,0.000998,0.001093451,0.1,l1,"{'logisticregression__C': 0.1, 'logisticregres...",0.668831,0.675325,0.655844,0.660131,0.666667,0.665359,0.006796,14
1,0.001987,0.000889,0.000804,0.0007484434,0.1,l2,"{'logisticregression__C': 0.1, 'logisticregres...",0.681818,0.694805,0.675325,0.673203,0.653595,0.675749,0.013397,13
2,0.003599,0.000483,0.000601,0.0004908475,0.2,l1,"{'logisticregression__C': 0.2, 'logisticregres...",0.746753,0.727273,0.772727,0.777778,0.745098,0.753926,0.018773,10
3,0.001384,0.000486,0.000613,0.0005005029,0.2,l2,"{'logisticregression__C': 0.2, 'logisticregres...",0.733766,0.701299,0.720779,0.72549,0.679739,0.712215,0.019432,12
4,0.005406,0.000485,0.000592,0.0004838485,0.5,l1,"{'logisticregression__C': 0.5, 'logisticregres...",0.74026,0.727273,0.772727,0.797386,0.771242,0.761777,0.025024,9
5,0.001795,0.0004,0.000392,0.0004803758,0.5,l2,"{'logisticregression__C': 0.5, 'logisticregres...",0.753247,0.707792,0.727273,0.777778,0.751634,0.743545,0.023977,11
6,0.007372,0.001353,0.000997,8.583069e-07,1.0,l1,"{'logisticregression__C': 1.0, 'logisticregres...",0.753247,0.75974,0.75974,0.79085,0.764706,0.765657,0.013112,7
7,0.001795,0.000399,0.000399,0.0004881691,1.0,l2,"{'logisticregression__C': 1.0, 'logisticregres...",0.75974,0.733766,0.74026,0.803922,0.777778,0.763093,0.025594,8
8,0.006981,0.000643,0.001196,0.0003840102,2.0,l1,"{'logisticregression__C': 2.0, 'logisticregres...",0.75974,0.753247,0.746753,0.810458,0.764706,0.766981,0.022564,6
9,0.001596,0.000476,0.0002,0.0003993988,2.0,l2,"{'logisticregression__C': 2.0, 'logisticregres...",0.766234,0.753247,0.753247,0.797386,0.771242,0.768271,0.016199,4
