In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

In [20]:
df = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv")
df.head()

Unnamed: 0,6,148,72,35,0,33.6,0.627,50,1
0,1,85,66,29,0,26.6,0.351,31,0
1,8,183,64,0,0,23.3,0.672,32,1
2,1,89,66,23,94,28.1,0.167,21,0
3,0,137,40,35,168,43.1,2.288,33,1
4,5,116,74,0,0,25.6,0.201,30,0


In [21]:
df.shape

(767, 9)

In [22]:
df.columns = ['preg','plas','pres','skin','test','mass','pedi','age','class']
df

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,1,85,66,29,0,26.6,0.351,31,0
1,8,183,64,0,0,23.3,0.672,32,1
2,1,89,66,23,94,28.1,0.167,21,0
3,0,137,40,35,168,43.1,2.288,33,1
4,5,116,74,0,0,25.6,0.201,30,0
...,...,...,...,...,...,...,...,...,...
762,10,101,76,48,180,32.9,0.171,63,0
763,2,122,70,27,0,36.8,0.340,27,0
764,5,121,72,23,112,26.2,0.245,30,0
765,1,126,60,0,0,30.1,0.349,47,1


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 767 entries, 0 to 766
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   preg    767 non-null    int64  
 1   plas    767 non-null    int64  
 2   pres    767 non-null    int64  
 3   skin    767 non-null    int64  
 4   test    767 non-null    int64  
 5   mass    767 non-null    float64
 6   pedi    767 non-null    float64
 7   age     767 non-null    int64  
 8   class   767 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [24]:
df['class'].value_counts(normalize=True)

class
0    0.65189
1    0.34811
Name: proportion, dtype: float64

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [26]:
x = df.iloc[:, :-1]
y = df.iloc[:,-1]

In [27]:
x

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age
0,1,85,66,29,0,26.6,0.351,31
1,8,183,64,0,0,23.3,0.672,32
2,1,89,66,23,94,28.1,0.167,21
3,0,137,40,35,168,43.1,2.288,33
4,5,116,74,0,0,25.6,0.201,30
...,...,...,...,...,...,...,...,...
762,10,101,76,48,180,32.9,0.171,63
763,2,122,70,27,0,36.8,0.340,27
764,5,121,72,23,112,26.2,0.245,30
765,1,126,60,0,0,30.1,0.349,47


In [28]:
y

0      0
1      1
2      0
3      1
4      0
      ..
762    0
763    0
764    0
765    1
766    0
Name: class, Length: 767, dtype: int64

In [29]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.20, random_state=42, stratify=y)

In [30]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(613, 8)
(613,)
(154, 8)
(154,)


In [8]:
from sklearn.pipeline import Pipeline

# 1. Using simple pipeline with only one model

In [79]:
pipe_1 = Pipeline(
    [
        ("scaler", StandardScaler()),
        ("classifier", LogisticRegression())
    ]
)

#### Training the model

In [80]:
pipe_1.fit(x_train, y_train)

0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


#### Testing the model

In [81]:
x_train_pred = pipe_1.predict(x_train)
print(f"Train accuracy is : {accuracy_score(x_train_pred, y_train)}")
x_test_pred = pipe_1.predict(x_test)
print(f"Train accuracy is : {accuracy_score(x_test_pred, y_test)}")

Train accuracy is : 0.797716150081566
Train accuracy is : 0.6948051948051948


## Pickle and Unpickle the model

In [83]:
joblib.dump(pipe_1, "model_1.pkl")

['model_1.pkl']

In [85]:
model_1 = joblib.load("model_1.pkl")
model_1

0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


## Testing it out after unpickling

In [101]:
test = np.array([[1,126,56,29,152,28.7,0.801,21]])
res = model_1.predict(test)
res[0]



np.int64(0)

## Some basics in Pipeline

In [102]:
pipe_1.get_feature_names_out

<bound method Pipeline.get_feature_names_out of Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier', LogisticRegression())])>

In [103]:
pipe_1.feature_names_in_

array(['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'],
      dtype=object)

In [107]:
pipe_1.get_params().keys()

dict_keys(['memory', 'steps', 'transform_input', 'verbose', 'scaler', 'classifier', 'scaler__copy', 'scaler__with_mean', 'scaler__with_std', 'classifier__C', 'classifier__class_weight', 'classifier__dual', 'classifier__fit_intercept', 'classifier__intercept_scaling', 'classifier__l1_ratio', 'classifier__max_iter', 'classifier__multi_class', 'classifier__n_jobs', 'classifier__penalty', 'classifier__random_state', 'classifier__solver', 'classifier__tol', 'classifier__verbose', 'classifier__warm_start'])

In [120]:
pipe_1.get_params()

{'memory': None,
 'steps': [('scaler', StandardScaler()), ('classifier', LogisticRegression())],
 'transform_input': None,
 'verbose': False,
 'scaler': StandardScaler(),
 'classifier': LogisticRegression(),
 'scaler__copy': True,
 'scaler__with_mean': True,
 'scaler__with_std': True,
 'classifier__C': 1.0,
 'classifier__class_weight': None,
 'classifier__dual': False,
 'classifier__fit_intercept': True,
 'classifier__intercept_scaling': 1,
 'classifier__l1_ratio': None,
 'classifier__max_iter': 100,
 'classifier__multi_class': 'deprecated',
 'classifier__n_jobs': None,
 'classifier__penalty': 'l2',
 'classifier__random_state': None,
 'classifier__solver': 'lbfgs',
 'classifier__tol': 0.0001,
 'classifier__verbose': 0,
 'classifier__warm_start': False}

# 2. Using GridSearchCV with simple Pipeline and with simple defined param_grid

In [190]:
param_grid_1 = [
    {
        "scaler" : [StandardScaler(), MinMaxScaler()],
#also providing this below is optional as already in the pipeline it is set, but recommendation is to keep the classifier and within a list
        "classifier": [LogisticRegression()],
        "classifier__penalty" : ["l1", "l2"],
        "classifier__solver": ["liblinear", "saga", "lbfgs"],
        "classifier__verbose" : [0 , 1, 2],
        "classifier__warm_start" : [True, False]
    }
]

In [144]:
from sklearn.model_selection import GridSearchCV

In [145]:
clf = GridSearchCV(estimator=pipe_1, param_grid=param_grid_1, cv = 10, scoring = "accuracy", return_train_score = True, verbose = 1)

#### Training the model

In [159]:
clf.fit(x_train, y_train)

Fitting 10 folds for each of 72 candidates, totalling 720 fits
[LibLinear]37801
Epoch 3, change: 0.10967485
Epoch 4, change: 0.046855434
Epoch 5, change: 0.027347045
Epoch 6, change: 0.01349896
Epoch 7, change: 0.0093925119
Epoch 8, change: 0.0059481729
Epoch 9, change: 0.0039205762
Epoch 10, change: 0.002530391
Epoch 11, change: 0.0015035065
Epoch 12, change: 0.0010143388
Epoch 13, change: 0.0006619527
Epoch 14, change: 0.00031886291
Epoch 15, change: 0.00026313774
Epoch 16, change: 0.00012375761
Epoch 17, change: 0.00011484668
Epoch 1, change: 1
Epoch 2, change: 0.18441038
Epoch 3, change: 0.078998761
Epoch 4, change: 0.04696999
Epoch 5, change: 0.025203305
Epoch 6, change: 0.023159907
Epoch 7, change: 0.0093042219
Epoch 8, change: 0.012148084
Epoch 9, change: 0.0029083889
Epoch 10, change: 0.0019217037
Epoch 11, change: 0.00072725976
Epoch 12, change: 0.0008776783
Epoch 13, change: 0.00027127235
Epoch 14, change: 0.00057697494
Epoch 15, change: 0.00022250259
Epoch 16, change: 0.0001

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

convergence after 19 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 13 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 13 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 17 epochs took 0 seconds
convergence after 19 epochs took 0 seconds
convergence after 26 epochs took 0 seconds
convergence after 26 epochs took 0 seconds
convergence after 26 epochs took 0 seconds
convergence after 25 epochs took 0 seconds
convergence after 27 epochs took 0 seconds
convergence after 24 epochs took 0 seconds
convergence after 26 epochs took 0 seconds
convergence after 28 epochs took 0 seconds
convergence after 28 epochs took 0 seconds


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

[LibLinear]00092344286
Epoch 18, change: 0.0007532037
Epoch 19, change: 0.00054541559
Epoch 20, change: 0.00040435069
Epoch 21, change: 0.00031651344
Epoch 22, change: 0.00024183264
Epoch 23, change: 0.00019587585
Epoch 24, change: 0.00014584381
Epoch 1, change: 1
Epoch 2, change: 0.30505454
Epoch 3, change: 0.13189586
Epoch 4, change: 0.058188293
Epoch 5, change: 0.047184896
Epoch 6, change: 0.04121553
Epoch 7, change: 0.027905818
Epoch 8, change: 0.017768976
Epoch 9, change: 0.011068204
Epoch 10, change: 0.010988673
Epoch 11, change: 0.0075164773
Epoch 12, change: 0.0071072377
Epoch 13, change: 0.0046700366
Epoch 14, change: 0.0027949147
Epoch 15, change: 0.0023703972
Epoch 16, change: 0.0011649562
Epoch 17, change: 0.0013920979
Epoch 18, change: 0.0010170365
Epoch 19, change: 0.00074658949
Epoch 20, change: 0.00050176508
Epoch 21, change: 0.00045824701
Epoch 22, change: 0.00032279197
Epoch 23, change: 0.00023189292
Epoch 24, change: 0.00019075884
Epoch 25, change: 0.00013789029
Epoc

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

convergence after 17 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 13 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 14 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 12 epochs took 0 seconds
convergence after 14 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 13 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 19 epochs took 0 seconds
Epoch 1, change: 1
Epoch 2, change: 0.19228092
Epoch 3, change: 0.11022967
Epoch 4, change: 0.055084069
Epoch 5, change: 0.065532846
Epoch 6, change: 0.025899667
Epoch 7, change: 0.00

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

0,1,2
,estimator,Pipeline(step...egression())])
,param_grid,"[{'classifier': [LogisticRegression()], 'classifier__penalty': ['l1', 'l2'], 'classifier__solver': ['liblinear', 'saga', ...], 'classifier__verbose': [0, 1, ...], ...}]"
,scoring,'accuracy'
,n_jobs,
,refit,True
,cv,10
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'liblinear'
,max_iter,100


#### Finding the best parameter model

In [160]:
clf.best_estimator_

0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'liblinear'
,max_iter,100


In [161]:
print(clf.best_estimator_)

Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 LogisticRegression(solver='liblinear', warm_start=True))])


In [162]:
clf.best_params_

{'classifier': LogisticRegression(),
 'classifier__penalty': 'l2',
 'classifier__solver': 'liblinear',
 'classifier__verbose': 0,
 'classifier__warm_start': True,
 'scaler': StandardScaler()}

#### Testing the model

In [163]:
x_train_pred_clf = clf.predict(x_train)
print(f"Train accuracy is : {accuracy_score(x_train_pred_clf, y_train)}")
x_test_pred_clf = clf.predict(x_test)
print(f"Train accuracy is : {accuracy_score(x_test_pred_clf, y_test)}")

Train accuracy is : 0.799347471451876
Train accuracy is : 0.6948051948051948


## Pickling and Unpickling the model

In [164]:
joblib.dump(clf.best_estimator_, "clf_logistic.pkl")

['clf_logistic.pkl']

In [165]:
model_2 = joblib.load("clf_logistic.pkl")
model_2

0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'liblinear'
,max_iter,100


#### Testing after unpickling the model

In [166]:
test = np.array([[0,141,0,0,0,42.4,0.205,29]])
res = model_2.predict(test)
res[0]



np.int64(1)

# 3. Using a complex Pipeline with different models

In [171]:
pipe_2 = {
    "logistic_regression" : Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", LogisticRegression())
        ]
    ),
    "random_forest" : Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", RandomForestClassifier())
        ]
    ),
    "SVM" : Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", SVC())
        ]
    ),
    "Gaussian_NB" : Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", GaussianNB())
        ]
    ),
    "knn" : Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", KNeighborsClassifier())
        ]
    ),
    "gradient_boosting" : Pipeline(
        [
            ("scaler", StandardScaler()),
            ("classifier", GradientBoostingClassifier())
        ]
    )
}

pipe_2

{'logistic_regression': Pipeline(steps=[('scaler', StandardScaler()),
                 ('classifier', LogisticRegression())]),
 'random_forest': Pipeline(steps=[('scaler', StandardScaler()),
                 ('classifier', RandomForestClassifier())]),
 'SVM': Pipeline(steps=[('scaler', StandardScaler()), ('classifier', SVC())]),
 'Gaussian_NB': Pipeline(steps=[('scaler', StandardScaler()), ('classifier', GaussianNB())]),
 'knn': Pipeline(steps=[('scaler', StandardScaler()),
                 ('classifier', KNeighborsClassifier())]),
 'gradient_boosting': Pipeline(steps=[('scaler', StandardScaler()),
                 ('classifier', GradientBoostingClassifier())])}

In [180]:
for model in pipe_2.values():
    print(model.get_params().keys())
    print("="*100)

dict_keys(['memory', 'steps', 'transform_input', 'verbose', 'scaler', 'classifier', 'scaler__copy', 'scaler__with_mean', 'scaler__with_std', 'classifier__C', 'classifier__class_weight', 'classifier__dual', 'classifier__fit_intercept', 'classifier__intercept_scaling', 'classifier__l1_ratio', 'classifier__max_iter', 'classifier__multi_class', 'classifier__n_jobs', 'classifier__penalty', 'classifier__random_state', 'classifier__solver', 'classifier__tol', 'classifier__verbose', 'classifier__warm_start'])
dict_keys(['memory', 'steps', 'transform_input', 'verbose', 'scaler', 'classifier', 'scaler__copy', 'scaler__with_mean', 'scaler__with_std', 'classifier__bootstrap', 'classifier__ccp_alpha', 'classifier__class_weight', 'classifier__criterion', 'classifier__max_depth', 'classifier__max_features', 'classifier__max_leaf_nodes', 'classifier__max_samples', 'classifier__min_impurity_decrease', 'classifier__min_samples_leaf', 'classifier__min_samples_split', 'classifier__min_weight_fraction_leaf

# 3. Using a complex param_grid along with complex Pipeline

In [9]:
param_grid_2 = [
    {
        "scaler" : [StandardScaler(), MinMaxScaler()],
        "classifier": [LogisticRegression()],
        "classifier__penalty" : ["l2"],
        "classifier__solver": ["liblinear", "saga", "lbfgs"],
        "classifier__verbose" : [0 , 1, 2],
        "classifier__warm_start" : [True, False]
    },
    {
        "scaler" : [StandardScaler(), MinMaxScaler()],
        "classifier": [RandomForestClassifier()],
        "classifier__n_estimators" : [100, 200, 300, 400, 500],
        "classifier__criterion" : ["gini", "entropy"],
        "classifier__max_depth" : [i for i in range(2,10,2)],
        "classifier__n_jobs" : [-1],
        "classifier__oob_score" : [True, False],
        "classifier__warm_start" : [True, False]
    },
    {
        "scaler" : [StandardScaler(), MinMaxScaler()],
        "classifier": [SVC()],
        "classifier__kernel" : ["poly", "rbf", "sigmoid"],
        "classifier__gamma" : ["scale", "auto"]
    },
    {
        "scaler" : [StandardScaler(), MinMaxScaler()],
        "classifier": [GaussianNB()]
    },
    {
        "scaler" : [StandardScaler(), MinMaxScaler()],
        "classifier": [KNeighborsClassifier()],
        "classifier__n_neighbors" : [i for i in range(5,20,5)],
        "classifier__weights" : ["uniform", "distance"],
        "classifier__n_jobs" : [-1]
    },
    {
        "scaler" : [StandardScaler(), MinMaxScaler()],
        "classifier": [GradientBoostingClassifier()],
        "classifier__n_estimators" : [100, 200, 300, 400, 500],
        "classifier__criterion" : ["friedman_mse", "squared_error"],
        "classifier__max_depth" : [i for i in range(2,10,2)],
    }
]

In [10]:
param_grid_2

[{'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [LogisticRegression()],
  'classifier__penalty': ['l2'],
  'classifier__solver': ['liblinear', 'saga', 'lbfgs'],
  'classifier__verbose': [0, 1, 2],
  'classifier__warm_start': [True, False]},
 {'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [RandomForestClassifier()],
  'classifier__n_estimators': [100, 200, 300, 400, 500],
  'classifier__criterion': ['gini', 'entropy'],
  'classifier__max_depth': [2, 4, 6, 8],
  'classifier__n_jobs': [-1],
  'classifier__oob_score': [True, False],
  'classifier__warm_start': [True, False]},
 {'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [SVC()],
  'classifier__kernel': ['poly', 'rbf', 'sigmoid'],
  'classifier__gamma': ['scale', 'auto']},
 {'scaler': [StandardScaler(), MinMaxScaler()], 'classifier': [GaussianNB()]},
 {'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [KNeighborsClassifier()],
  'classifier__n_neighbors': [5, 10, 15],
  'classif

In [213]:
#this will create an error as estimator parameter accepts only on object and not dictionary of models
#clf_2 = GridSearchCV(estimator=pipe_2, param_grid=param_grid_2, cv = 10, scoring="accuracy", return_train_score=True,verbose=1)

In [214]:
#clf_2.fit(x_train, y_train) -> will create an error as mentioned above

## 1st method: Looping through the Pipeline and its corresponding param_grid to compare different models and also Training the model

In [217]:
all_models = {} #storing all the models to access globally
for (model_name, model_pipeline), param_grid in zip(pipe_2.items(), param_grid_2):
    print(f"Model name : {model_name}")
    clf_2 = GridSearchCV(estimator=model_pipeline, param_grid=param_grid, cv = 10, scoring="accuracy", return_train_score=True,verbose=1,error_score='raise')
    clf_2.fit(x_train, y_train)
    all_models[model_name] = clf_2
    print(clf_2.best_estimator_)
    print(clf_2.best_params_)
    print(clf_2.best_score_)

Model name : logistic_regression
Fitting 10 folds for each of 36 candidates, totalling 360 fits
[LibLinear]96578944
Epoch 9, change: 0.0052673359
Epoch 10, change: 0.0023754662
Epoch 11, change: 0.0017535281
Epoch 12, change: 0.001661653
Epoch 13, change: 0.000780769
Epoch 14, change: 0.0017951236
Epoch 15, change: 0.00036104627
Epoch 16, change: 0.00014526985
Epoch 1, change: 1
Epoch 2, change: 0.18937842
Epoch 3, change: 0.14353771
Epoch 4, change: 0.062090211
Epoch 5, change: 0.022004635
Epoch 6, change: 0.020013529
Epoch 7, change: 0.013806836
Epoch 8, change: 0.0067135462
Epoch 9, change: 0.003781081
Epoch 10, change: 0.0021937976
Epoch 11, change: 0.0015404056
Epoch 12, change: 0.0012471712
Epoch 13, change: 0.00085970337
Epoch 14, change: 0.00041750033
Epoch 15, change: 0.00030713643
Epoch 16, change: 0.00013173656
Epoch 17, change: 0.00012152642
Epoch 1, change: 1
Epoch 2, change: 0.21781727
Epoch 3, change: 0.098235322
Epoch 4, change: 0.060264473
Epoch 5, change: 0.023017908


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

convergence after 14 epochs took 0 seconds
convergence after 14 epochs took 0 seconds
convergence after 13 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 19 epochs took 0 seconds
846559
Epoch 3, change: 0.081705152
Epoch 4, change: 0.066254972
Epoch 5, change: 0.038067153
Epoch 6, change: 0.013852203
Epoch 7, change: 0.017829149
Epoch 8, change: 0.009720434
Epoch 9, change: 0.0057057532
Epoch 10, change: 0.0016036092
Epoch 11, change: 0.0021778074
Epoch 12, change: 0.0010023231
Epoch 13, change: 0.00062085497
Epoch 14, change: 0.00040171751
Epoch 15, change: 0.00024222666
Epoch 16, change: 0.00027505894
Epoch 1, change: 1
Epoch 2, change: 0.19749858
Epoch 3, change: 0.10055547
Epoch 4, change: 0.052981197
Epoch 5, change: 0.042175076
Epoch 6, change: 0.018857923
Epoch 7, change: 0.012157312
Epoch 8, change: 0.006273855
Epoch 9, change: 0.005545467
Epoch 10, change: 0.0033645924
Epoch 11, change: 0.0019351797

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 LogisticRegression(solver='liblinear', warm_start=True))])
{'classifier': LogisticRegression(), 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear', 'classifier__verbose': 0, 'classifier__warm_start': True, 'scaler': StandardScaler()}
0.78783712321523
Model name : random_forest
Fitting 10 folds for each of 320 candidates, totalling 3200 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 RandomForestClassifier(criterion='entropy', max_depth=8,
                                        n_estimators=200, n_jobs=-1,
                                        oob_score=True))])
{'classifier': RandomForestClassifier(), 'classifier__criterion': 'entropy', 'classifier__max_depth': 8, 'classifier__n_estimators': 200, 'classifier__n_jobs': -1, 'classifier__oob_score': True, 'classifier__warm_start': False, 'scaler': StandardScaler()}
0.79444738233738

In [None]:
#list(zip(pipe_2.items(), param_grid_2))

In [None]:
#print(list(zip(pipe_2.items(), param_grid_2))[0])

## Finding the model information through looping

In [219]:
for model, clf_final in all_models.items():
    print(f"Model Name: {model}")
    print(f"Accuracy : {clf_final.best_score_}")
    print(f"Parameters : {clf_final.best_params_}")
    print(f"Pipeline : {clf_final.best_estimator_}")
    print("="*100)

Model Name: logistic_regression
Accuracy : 0.78783712321523
Parameters : {'classifier': LogisticRegression(), 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear', 'classifier__verbose': 0, 'classifier__warm_start': True, 'scaler': StandardScaler()}
Pipeline : Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 LogisticRegression(solver='liblinear', warm_start=True))])
Model Name: random_forest
Accuracy : 0.7944473823373877
Parameters : {'classifier': RandomForestClassifier(), 'classifier__criterion': 'entropy', 'classifier__max_depth': 8, 'classifier__n_estimators': 200, 'classifier__n_jobs': -1, 'classifier__oob_score': True, 'classifier__warm_start': False, 'scaler': StandardScaler()}
Pipeline : Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 RandomForestClassifier(criterion='entropy', max_depth=8,
                                        n_estimators=200, n_jobs=-1,
                    

## Finding the best model

In [224]:
best_model = max(all_models, key=lambda name: all_models[name].best_score_)
best_model

'random_forest'

In [244]:
best_model_clf_2 = all_models[best_model]
best_model_clf_2

0,1,2
,estimator,Pipeline(step...lassifier())])
,param_grid,"{'classifier': [RandomForestClassifier()], 'classifier__criterion': ['gini', 'entropy'], 'classifier__max_depth': [2, 4, ...], 'classifier__n_estimators': [100, 200, ...], ...}"
,scoring,'accuracy'
,n_jobs,
,refit,True
,cv,10
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,'raise'
,return_train_score,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_estimators,200
,criterion,'entropy'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


#### Testing the model

In [245]:
x_train_pred_clf_2 = best_model_clf_2.predict(x_train)
print(f"Train accuracy is : {accuracy_score(x_train_pred_clf_2, y_train)}")
x_test_pred_clf_2 = best_model_clf_2.predict(x_test)
print(f"Test accuracy is : {accuracy_score(x_test_pred_clf_2, y_test)}")

Train accuracy is : 0.964110929853181
Test accuracy is : 0.7077922077922078


## Pickling and unpickling the model

In [246]:
joblib.dump(best_model_clf_2.best_estimator_, "rf_clf_2.pkl")

['rf_clf_2.pkl']

In [247]:
rf_clf_2 = joblib.load("rf_clf_2.pkl")
rf_clf_2

0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_estimators,200
,criterion,'entropy'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


## Testing after Unpickling the model

In [249]:
test = np.array([[0,141,0,0,0,42.4,0.205,29]])
res = rf_clf_2.predict(test)
res[0]



np.int64(1)

# 4. 2nd way: Using the simple Pipeline and the complex param_grid

In [232]:
pipe_1

0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [233]:
clf_3 = GridSearchCV(estimator=pipe_1, param_grid=param_grid_2, cv = 10, scoring="accuracy", return_train_score=True,verbose=1)

#### Training the model

In [234]:
clf_3.fit(x_train, y_train)

Fitting 10 folds for each of 462 candidates, totalling 4620 fits
[LibLinear]h 16, change: 0.00022252024
Epoch 1, change: 1
Epoch 2, change: 0.18223585
Epoch 3, change: 0.1077345
Epoch 4, change: 0.052506473
Epoch 5, change: 0.037551624
Epoch 6, change: 0.012059118
Epoch 7, change: 0.015919008
Epoch 8, change: 0.0032635861
Epoch 9, change: 0.0028189031
Epoch 10, change: 0.0017201005
Epoch 11, change: 0.00077718525
Epoch 12, change: 0.00065633612
Epoch 13, change: 0.00041049249
Epoch 14, change: 0.00028054137
Epoch 15, change: 0.00020193067
Epoch 16, change: 0.00013550219
Epoch 1, change: 1
Epoch 2, change: 0.25208274
Epoch 3, change: 0.14490346
Epoch 4, change: 0.027515083
Epoch 5, change: 0.022765791
Epoch 6, change: 0.014429262
Epoch 7, change: 0.012567691
Epoch 8, change: 0.0073718902
Epoch 9, change: 0.0043215901
Epoch 10, change: 0.0018839545
Epoch 11, change: 0.00069598064
Epoch 12, change: 0.0008486144
Epoch 13, change: 0.00041494668
Epoch 14, change: 0.00034458355
Epoch 15, chan

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

convergence after 13 epochs took 0 seconds
convergence after 18 epochs took 0 seconds
convergence after 17 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
ange: 0.00078122859
Epoch 14, change: 0.0011554615
Epoch 15, change: 0.00012314807
Epoch 16, change: 0.00012746478
Epoch 1, change: 1
Epoch 2, change: 0.20399368
Epoch 3, change: 0.1125744
Epoch 4, change: 0.048871574
Epoch 5, change: 0.025519171
Epoch 6, change: 0.013641144
Epoch 7, change: 0.0080758472
Epoch 8, change: 0.0072232885
Epoch 9, change: 0.0032664643
Epoch 10, change: 0.0021205636
Epoch 11, change: 0.0013508798
Epoch 12, change: 0.0003405133
Epoch 13, change: 0.00017954286
Epoch 1, change: 1
Epoch 2, change: 0.20880797
Epoch 3, change: 0.12349473
Epoch 4, change: 0.070249302
Epoch 5, change: 0.028017468
Epoch 6, change: 0.020921196
Epoch 7, change: 0.012538454
Epoch 8, change: 0.0072971992
Epoch 9, change: 0.0061704719
Epoch 10, change: 0.0033887045
Epoch 11, ch

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

0,1,2
,estimator,Pipeline(step...egression())])
,param_grid,"[{'classifier': [LogisticRegression()], 'classifier__penalty': ['l2'], 'classifier__solver': ['liblinear', 'saga', ...], 'classifier__verbose': [0, 1, ...], ...}, {'classifier': [RandomForestClassifier()], 'classifier__criterion': ['gini', 'entropy'], 'classifier__max_depth': [2, 4, ...], 'classifier__n_estimators': [100, 200, ...], ...}, ...]"
,scoring,'accuracy'
,n_jobs,
,refit,True
,cv,10
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,True

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [236]:
print(f"Accuracy : {clf_3.best_score_}")
print(f"Parameters : {clf_3.best_params_}")
print(f"Pipeline : {clf_3.best_estimator_}")

Accuracy : 0.7911951348492862
Parameters : {'classifier': RandomForestClassifier(), 'classifier__criterion': 'gini', 'classifier__max_depth': 8, 'classifier__n_estimators': 200, 'classifier__n_jobs': -1, 'classifier__oob_score': True, 'classifier__warm_start': True, 'scaler': MinMaxScaler()}
Pipeline : Pipeline(steps=[('scaler', MinMaxScaler()),
                ('classifier',
                 RandomForestClassifier(max_depth=8, n_estimators=200,
                                        n_jobs=-1, oob_score=True,
                                        warm_start=True))])


#### Testing the model

In [238]:
x_train_pred_clf_3 = clf_3.predict(x_train)
print(f"Train accuracy is : {accuracy_score(x_train_pred_clf_3, y_train)}")
x_test_pred_clf_3 = clf_3.predict(x_test)
print(f"Test accuracy is : {accuracy_score(x_test_pred_clf_3, y_test)}")

Train accuracy is : 0.965742251223491
Test accuracy is : 0.7012987012987013


## Pickling and unpickling the model

In [239]:
joblib.dump(clf_3.best_estimator_, "rf_clf_3.pkl")

['rf_clf_3.pkl']

In [240]:
rf_clf_3 = joblib.load("rf_clf_3.pkl")
rf_clf_3

0,1,2
,steps,"[('scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


## Testing after Unpickling the model

In [241]:
test = np.array([[0,141,0,0,0,42.4,0.205,29]])
res = rf_clf_3.predict(test)
res[0]



np.int64(1)

# Applying SMOTE technique and using it in Imblearn Pipeline, else apply SMOTE on training data and then use sklearn Pipeline but best is using the Imblearn Pipeline

In [12]:
from imblearn.pipeline import Pipeline  # different from sklearn.pipeline
from imblearn.over_sampling import SMOTE

In [13]:
pipe_3 = Pipeline(
    [
        ("smote", SMOTE(random_state=42)),
        ("scaler", StandardScaler()),
        ("classifier", LogisticRegression())
    ]
)

pipe_3

In [14]:
param_grid_2

[{'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [LogisticRegression()],
  'classifier__penalty': ['l2'],
  'classifier__solver': ['liblinear', 'saga', 'lbfgs'],
  'classifier__verbose': [0, 1, 2],
  'classifier__warm_start': [True, False]},
 {'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [RandomForestClassifier()],
  'classifier__n_estimators': [100, 200, 300, 400, 500],
  'classifier__criterion': ['gini', 'entropy'],
  'classifier__max_depth': [2, 4, 6, 8],
  'classifier__n_jobs': [-1],
  'classifier__oob_score': [True, False],
  'classifier__warm_start': [True, False]},
 {'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [SVC()],
  'classifier__kernel': ['poly', 'rbf', 'sigmoid'],
  'classifier__gamma': ['scale', 'auto']},
 {'scaler': [StandardScaler(), MinMaxScaler()], 'classifier': [GaussianNB()]},
 {'scaler': [StandardScaler(), MinMaxScaler()],
  'classifier': [KNeighborsClassifier()],
  'classifier__n_neighbors': [5, 10, 15],
  'classif

In [15]:
from sklearn.model_selection import GridSearchCV

In [18]:
clf_4 = GridSearchCV(estimator=pipe_3, param_grid=param_grid_2, cv=10, scoring='f1', return_train_score=True, verbose=1)

In [31]:
clf_4.fit(x_train, y_train)

Fitting 10 folds for each of 462 candidates, totalling 4620 fits
[LibLinear]iter  1 act 1.249e+02 pre 1.133e+02 delta 1.082e+00 f 4.991e+02 |g| 2.482e+02 CG   2
iter  2 act 9.740e+00 pre 8.770e+00 delta 1.082e+00 f 3.741e+02 |g| 4.866e+01 CG   3
iter  3 act 4.166e-01 pre 4.047e-01 delta 1.082e+00 f 3.644e+02 |g| 8.455e+00 CG   3
iter  4 act 3.184e-03 pre 3.183e-03 delta 1.082e+00 f 3.640e+02 |g| 7.695e-01 CG   3
iter  5 act 1.983e-05 pre 1.982e-05 delta 1.082e+00 f 3.640e+02 |g| 5.211e-02 CG   3
[LibLinear]iter  1 act 1.309e+02 pre 1.178e+02 delta 1.113e+00 f 4.991e+02 |g| 2.563e+02 CG   2
iter  2 act 1.353e+01 pre 1.198e+01 delta 1.113e+00 f 3.681e+02 |g| 5.575e+01 CG   3
iter  3 act 7.854e-01 pre 7.528e-01 delta 1.113e+00 f 3.546e+02 |g| 1.103e+01 CG   3
iter  4 act 7.804e-03 pre 7.785e-03 delta 1.113e+00 f 3.538e+02 |g| 9.798e-01 CG   3
iter  5 act 1.194e-05 pre 1.194e-05 delta 1.113e+00 f 3.538e+02 |g| 5.883e-02 CG   2
[LibLinear]iter  1 act 1.269e+02 pre 1.146e+02 delta 1.064e+00 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

convergence after 17 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
Epoch 1, change: 1
Epoch 2, change: 0.10732978
Epoch 3, change: 0.073803619
Epoch 4, change: 0.045503008
Epoch 5, change: 0.028597612
Epoch 6, change: 0.0088727279
Epoch 7, change: 0.0032125168
Epoch 8, change: 0.00229394
Epoch 9, change: 0.0035332707
Epoch 10, change: 0.00093163508
Epoch 11, change: 0.00059174661
Epoch 12, change: 0.00018125433
Epoch 13, change: 0.00026697434
Epoch 14, change: 0.00011528266
Epoch 1, change: 1
Epoch 2, change: 0.1926749
Epoch 3, change: 0.093343577
Epoch 4, change: 0.082973173
Epoch 5, change: 0.041924679
Epoch 6, change: 0.012439807
Epoch 7, change: 0.0058544291
Epoch 8, change: 0.0034585331
Epoch 9, change: 0.0020605673
Epoch 10, change: 0.0012535619
Epoch 11, change: 0.0019453044
Epoch 12, change: 0.00029219713
Epoch 13, change: 0.00010549192
Epoch 14, change: 0.00012789266
Epoch 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

convergence after 12 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 14 epochs took 0 seconds
convergence after 17 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 16 epochs took 0 seconds
convergence after 15 epochs took 0 seconds
convergence after 17 epochs took 0 seconds
convergence after 17 epochs took 0 seconds
convergence after 14 epochs took 0 seconds
7848
Epoch 14, change: 0.00016112967
Epoch 1, change: 1
Epoch 2, change: 0.18403194
Epoch 3, change: 0.03953069
Epoch 4, change: 0.030647831
Epoch 5, change: 0.02671522
Epoch 6, change: 0.011295177
Epoch 7, change: 0.007325098
Epoch 8, change: 0.0038817619
Epoch 9, change: 0.012216608
Epoch 10, change: 0.0035336528
Epoch 11, change: 0.0014329338
Epoch 12, change: 0.00049737614
Epoch 13, change: 0.00034630473
Epoch 14, change: 0.00027827268
Epoch 15, change: 0.00053803119
Epoch 16, change: 0.00020574941
Epoch 1, change: 1
Epoch 2, change: 0.21554334
Epoch 3, change

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]

In [32]:
print(f"Accuracy : {clf_4.best_score_}")
print(f"Parameters : {clf_4.best_params_}")
print(f"Pipeline : {clf_4.best_estimator_}")

Accuracy : 0.7148211091797226
Parameters : {'classifier': RandomForestClassifier(), 'classifier__criterion': 'entropy', 'classifier__max_depth': 4, 'classifier__n_estimators': 300, 'classifier__n_jobs': -1, 'classifier__oob_score': True, 'classifier__warm_start': False, 'scaler': MinMaxScaler()}
Pipeline : Pipeline(steps=[('smote', SMOTE(random_state=42)), ('scaler', MinMaxScaler()),
                ('classifier',
                 RandomForestClassifier(criterion='entropy', max_depth=4,
                                        n_estimators=300, n_jobs=-1,
                                        oob_score=True))])


#### Testing the model

In [34]:
x_train_pred_clf_4 = clf_4.predict(x_train)
print(f"Train accuracy is : {accuracy_score(x_train_pred_clf_4, y_train)}")
x_test_pred_clf_4 = clf_4.predict(x_test)
print(f"Test accuracy is : {accuracy_score(x_test_pred_clf_4, y_test)}")

Train accuracy is : 0.8189233278955954
Test accuracy is : 0.7532467532467533


## Pickling and unpickling the model

In [35]:
joblib.dump(clf_4.best_estimator_, "rf_clf_4.pkl")

['rf_clf_4.pkl']

In [36]:
rf_clf_4 = joblib.load("rf_clf_4.pkl")
rf_clf_4

## Testing after Unpickling the model

In [39]:
test = np.array([[4,125,70,18,122,28.9,1.144,45]])
res = rf_clf_4.predict(test)
res[0]



np.int64(1)