In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import auc, roc_curve, make_scorer, f1_score, precision_score, recall_score, confusion_matrix 
from sklearn.model_selection import cross_val_score, learning_curve, validation_curve, StratifiedKFold, train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

  return f(*args, **kwds)


#### Iris DataSet 

In [31]:
from sklearn.datasets import load_diabetes
iris = load_iris()

In [32]:
X, y = iris.data, iris.target

In [33]:
X.shape

(150, 4)

In [34]:
X_train, X_test ,y_train, y_test = train_test_split(X, y, test_size = 0.20)

In [35]:
X_train.shape

(120, 4)

#### Pipeline 

In [20]:
from sklearn.pipeline import Pipeline, make_pipeline

In [63]:
pipe_lr = make_pipeline(StandardScaler(),
                       PCA(n_components=2),
                       LogisticRegression())

pipe_svc = make_pipeline(StandardScaler(),
                        PCA(n_components=2),
                        SVC())

pipe_dt = make_pipeline(PCA(n_components=2),
                       DecisionTreeClassifier())

pipe_rf = make_pipeline(PCA(n_components=2),
                       RandomForestClassifier())

pipe_knn = make_pipeline(StandardScaler(),
                        PCA(n_components=2),
                        KNeighborsClassifier())

pipe_gb = make_pipeline(PCA(n_components=2),
                       GradientBoostingClassifier())

pipe_ada = make_pipeline(PCA(n_components=2),
                        AdaBoostClassifier())

#### K-Fold Cross Validation

In [36]:
kfold = StratifiedKFold().split(X_train, y_train)

scores_lr= []

for k,(train,test) in enumerate(kfold):
    
    pipe_lr.fit(X_train[train],y_train[train])
    
    score_lr = pipe_lr.score(X_train[test], y_train[test])
    scores_lr.append(score_lr)
    print(f'Fold: {k+1}, Lr Accuracy: {score_lr}')

Fold: 1, Lr Accuracy: 0.8333333333333334
Fold: 2, Lr Accuracy: 0.9583333333333334
Fold: 3, Lr Accuracy: 0.875
Fold: 4, Lr Accuracy: 1.0
Fold: 5, Lr Accuracy: 0.875


In [58]:
def cross_k_fold(X_train, y_train, pipe, model):
    
    scores= []
    kfold = StratifiedKFold().split(X_train, y_train)

    for k,(train,test) in enumerate(kfold):

        pipe_knn.fit(X_train[train],y_train[train])

        score = pipe_knn.score(X_train[test], y_train[test])
        scores.append(score)
        print(f'Fold: {k+1}, {model.upper()} Accuracy: {score}')

In [70]:
print(cross_k_fold(X_train, y_train,pipe_lr, 'Lr'))
print(cross_k_fold(X_train, y_train,pipe_svc, 'svc'))
print(cross_k_fold(X_train, y_train,pipe_knn, 'knn'))
print(cross_k_fold(X_train, y_train,pipe_dt, 'Decision Tree'))
print(cross_k_fold(X_train, y_train,pipe_gb, 'Gradient Boosting'))
print(cross_k_fold(X_train, y_train,pipe_rf, 'Random Forest'))
print(cross_k_fold(X_train, y_train,pipe_ada, 'Ada Boost'))

Fold: 1, LR Accuracy: 0.8333333333333334
Fold: 2, LR Accuracy: 0.9583333333333334
Fold: 3, LR Accuracy: 0.875
Fold: 4, LR Accuracy: 1.0
Fold: 5, LR Accuracy: 0.875
None
Fold: 1, SVC Accuracy: 0.8333333333333334
Fold: 2, SVC Accuracy: 0.9583333333333334
Fold: 3, SVC Accuracy: 0.8333333333333334
Fold: 4, SVC Accuracy: 1.0
Fold: 5, SVC Accuracy: 0.875
None
Fold: 1, KNN Accuracy: 0.8333333333333334
Fold: 2, KNN Accuracy: 0.9583333333333334
Fold: 3, KNN Accuracy: 0.8333333333333334
Fold: 4, KNN Accuracy: 1.0
Fold: 5, KNN Accuracy: 0.9166666666666666
None
Fold: 1, DECISION TREE Accuracy: 0.9166666666666666
Fold: 2, DECISION TREE Accuracy: 0.8333333333333334
Fold: 3, DECISION TREE Accuracy: 0.9583333333333334
Fold: 4, DECISION TREE Accuracy: 0.9583333333333334
Fold: 5, DECISION TREE Accuracy: 0.875
None
Fold: 1, GRADIENT BOOSTING Accuracy: 0.9166666666666666
Fold: 2, GRADIENT BOOSTING Accuracy: 0.8333333333333334
Fold: 3, GRADIENT BOOSTING Accuracy: 0.9583333333333334
Fold: 4, GRADIENT BOOSTI

In [71]:
from Python.cross_val_score import cross_val_score

In [72]:
cross_val_score(pipe_lr, X_train, y_train,cv =10)

CV Accuracy: 0.9: +/- 0.06236095644623234


In [73]:
cross_val_score(pipe_knn, X_train, y_train, cv=10)

CV Accuracy: 0.9083333333333332: +/- 0.05833333333333332


In [74]:
cross_val_score(pipe_svc, X_train, y_train, cv=10)

CV Accuracy: 0.9083333333333332: +/- 0.06922186552431728


In [75]:
cross_val_score(pipe_ada, X_train, y_train, cv=10)

CV Accuracy: 0.8666666666666666: +/- 0.1130388330520878


In [76]:
cross_val_score(pipe_gb, X_train, y_train, cv=10)

CV Accuracy: 0.9249999999999998: +/- 0.05833333333333333


In [77]:
cross_val_score(pipe_rf, X_train, y_train, cv=10)

CV Accuracy: 0.9083333333333334: +/- 0.06922186552431728


In [78]:
cross_val_score(pipe_dt, X_train, y_train, cv=10)

CV Accuracy: 0.9166666666666666: +/- 0.06454972243679027


In [102]:
from Python.learning_curve import learning_curve

In [103]:
learning_curve(pipe_lr, X_train, y_train, 'Lr', cv = 10)

train_mean: [0.9        0.87619048 0.934375   0.91395349 0.88518519 0.8953125
 0.89066667 0.90465116 0.9185567  0.91944444]
 train_std: [1.11022302e-16 3.80952381e-02 1.68286400e-02 2.09302326e-02
 1.38579903e-02 1.22035151e-02 9.97775303e-03 8.70152881e-03
 1.34020619e-02 8.33333333e-03]
test_mean: [0.81666667 0.89166667 0.89166667 0.88333333 0.89166667 0.89166667
 0.88333333 0.89166667 0.89166667 0.9       ]
test_std: [0.05       0.06508541 0.06508541 0.05527708 0.075      0.075
 0.07637626 0.075      0.075      0.06236096]


In [106]:
from Python.lc import learning_curve

In [107]:
learning_curve(pipe_lr, X_train, y_train, 'Lr', cv = 10)

	LR
train_mean: 
[0.9        0.87619048 0.934375   0.91395349 0.88518519 0.8953125
 0.89066667 0.90465116 0.9185567  0.91944444]

 train_std: 
[1.11022302e-16 3.80952381e-02 1.68286400e-02 2.09302326e-02
 1.38579903e-02 1.22035151e-02 9.97775303e-03 8.70152881e-03
 1.34020619e-02 8.33333333e-03]

test_mean: 
[0.81666667 0.89166667 0.89166667 0.88333333 0.89166667 0.89166667
 0.88333333 0.89166667 0.89166667 0.9       ]

test_std: 
[0.05       0.06508541 0.06508541 0.05527708 0.075      0.075
 0.07637626 0.075      0.075      0.06236096]


(array([0.9       , 0.87619048, 0.934375  , 0.91395349, 0.88518519,
        0.8953125 , 0.89066667, 0.90465116, 0.9185567 , 0.91944444]),
 array([1.11022302e-16, 3.80952381e-02, 1.68286400e-02, 2.09302326e-02,
        1.38579903e-02, 1.22035151e-02, 9.97775303e-03, 8.70152881e-03,
        1.34020619e-02, 8.33333333e-03]),
 array([0.81666667, 0.89166667, 0.89166667, 0.88333333, 0.89166667,
        0.89166667, 0.88333333, 0.89166667, 0.89166667, 0.9       ]),
 array([1.11022302e-16, 3.80952381e-02, 1.68286400e-02, 2.09302326e-02,
        1.38579903e-02, 1.22035151e-02, 9.97775303e-03, 8.70152881e-03,
        1.34020619e-02, 8.33333333e-03]))

In [108]:
from Python.plotting import learning_curve

In [109]:
learning_curve([0.9       , 0.87619048, 0.934375  , 0.91395349, 0.88518519,
        0.8953125 , 0.89066667, 0.90465116, 0.9185567 , 0.91944444],[1.11022302e-16, 3.80952381e-02, 1.68286400e-02, 2.09302326e-02,
        1.38579903e-02, 1.22035151e-02, 9.97775303e-03, 8.70152881e-03,
        1.34020619e-02, 8.33333333e-03],[0.81666667, 0.89166667, 0.89166667, 0.88333333, 0.89166667,
        0.89166667, 0.88333333, 0.89166667, 0.89166667, 0.9       ],[1.11022302e-16, 3.80952381e-02, 1.68286400e-02, 2.09302326e-02,
        1.38579903e-02, 1.22035151e-02, 9.97775303e-03, 8.70152881e-03,
        1.34020619e-02, 8.33333333e-03] )

TypeError: learning_curve() missing 1 required positional argument: 'test_std'

In [110]:
from Python.lcp import learning_curve

In [111]:
learning_curve(pipe_lr, X_train, y_train, 'Lr', cv = 10)

	LR
train_mean: 
[0.9        0.87619048 0.934375   0.91395349 0.88518519 0.8953125
 0.89066667 0.90465116 0.9185567  0.91944444]

 train_std: 
[1.11022302e-16 3.80952381e-02 1.68286400e-02 2.09302326e-02
 1.38579903e-02 1.22035151e-02 9.97775303e-03 8.70152881e-03
 1.34020619e-02 8.33333333e-03]

test_mean: 
[0.81666667 0.89166667 0.89166667 0.88333333 0.89166667 0.89166667
 0.88333333 0.89166667 0.89166667 0.9       ]

test_std: 
[0.05       0.06508541 0.06508541 0.05527708 0.075      0.075
 0.07637626 0.075      0.075      0.06236096]


(array([0.9       , 0.87619048, 0.934375  , 0.91395349, 0.88518519,
        0.8953125 , 0.89066667, 0.90465116, 0.9185567 , 0.91944444]),
 array([1.11022302e-16, 3.80952381e-02, 1.68286400e-02, 2.09302326e-02,
        1.38579903e-02, 1.22035151e-02, 9.97775303e-03, 8.70152881e-03,
        1.34020619e-02, 8.33333333e-03]),
 array([0.81666667, 0.89166667, 0.89166667, 0.88333333, 0.89166667,
        0.89166667, 0.88333333, 0.89166667, 0.89166667, 0.9       ]),
 array([0.05      , 0.06508541, 0.06508541, 0.05527708, 0.075     ,
        0.075     , 0.07637626, 0.075     , 0.075     , 0.06236096]),
 array([ 10,  21,  32,  43,  54,  64,  75,  86,  97, 108]))

In [113]:
from Python.lcpp import learning_curve

In [114]:
learning_curve(pipe_lr, X_train, y_train, 'Lr', cv = 10)

	LR
train_mean: 
[0.9        0.87619048 0.934375   0.91395349 0.88518519 0.8953125
 0.89066667 0.90465116 0.9185567  0.91944444]

 train_std: 
[1.11022302e-16 3.80952381e-02 1.68286400e-02 2.09302326e-02
 1.38579903e-02 1.22035151e-02 9.97775303e-03 8.70152881e-03
 1.34020619e-02 8.33333333e-03]

test_mean: 
[0.81666667 0.89166667 0.89166667 0.88333333 0.89166667 0.89166667
 0.88333333 0.89166667 0.89166667 0.9       ]

test_std: 
[0.05       0.06508541 0.06508541 0.05527708 0.075      0.075
 0.07637626 0.075      0.075      0.06236096]


(array([0.9       , 0.87619048, 0.934375  , 0.91395349, 0.88518519,
        0.8953125 , 0.89066667, 0.90465116, 0.9185567 , 0.91944444]),
 array([1.11022302e-16, 3.80952381e-02, 1.68286400e-02, 2.09302326e-02,
        1.38579903e-02, 1.22035151e-02, 9.97775303e-03, 8.70152881e-03,
        1.34020619e-02, 8.33333333e-03]),
 array([0.81666667, 0.89166667, 0.89166667, 0.88333333, 0.89166667,
        0.89166667, 0.88333333, 0.89166667, 0.89166667, 0.9       ]),
 array([0.05      , 0.06508541, 0.06508541, 0.05527708, 0.075     ,
        0.075     , 0.07637626, 0.075     , 0.075     , 0.06236096]),
 array([ 10,  21,  32,  43,  54,  64,  75,  86,  97, 108]))