## Step 0: Setup

In [2]:
from sklearn.datasets import load_iris                 # dataset
from sklearn.preprocessing import StandardScaler       # converts the distribution to mean = 0, std.dev = 1
from sklearn.linear_model import LogisticRegression    # classification model
from sklearn.pipeline import Pipeline                  # pipeline for scaling --> model
from sklearn.model_selection import cross_val_score    # cross validation

## Step 1: Load dataset

In [3]:
iris = load_iris()

In [4]:
for key in iris:
    print(key)

data
target
frame
target_names
DESCR
feature_names
filename
data_module


In [5]:
X = iris.data
y = iris.target

## Step 2: make a pipeline
1. scale feature into standard scaling = mean = 0, standard dev = 1
2. apply classification model onto the features

In [6]:
pipe = Pipeline([
    ("scaler", StandardScaler() ),     # step-1: apply scaling
    ("model", LogisticRegression(max_iter = 1000) )   # step-2: apply classification model
])

In [7]:
print(pipe)

Pipeline(steps=[('scaler', StandardScaler()),
                ('model', LogisticRegression(max_iter=1000))])


#### Note:
`scalar` and `model` aren't any keywords, they are just the names I'm giving to the first and second step respectively

## Step 3: apply cross validation using this pipeline

In [8]:
scores = cross_val_score(
    estimator = pipe,     # pipeline of scaling -> classification model
    X = X,                # will be subjected to the pipeline of (i) scaling and then (ii) applying model
    y = y,                # target
    cv = 5,               # 5 fold cv
    scoring = 'accuracy'  # performance metric
)

## Step 4: Results

In [9]:
print("Cross Validation Scores : ", scores)

Cross Validation Scores :  [0.96666667 1.         0.93333333 0.9        1.        ]


In [10]:
print("Average Mean Accuracy : ", scores.mean())

Average Mean Accuracy :  0.9600000000000002
