In [1]:
# Author: Roi Yehoshua <roiyeho@gmail.com>
# Date: Jaunary 2014
# License: MIT

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

np.random.seed(42)

In [3]:
from sklearn.datasets import load_iris

X, y = load_iris(as_frame=True, return_X_y=True)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [5]:
from sklearn.pipeline import Pipeline

model = Pipeline([    
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression())
])

In [6]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, X_train, y_train, cv=5)
print('CV scores:', np.round(scores, 4))
print(f'Average score: {scores.mean():.4f} ± {scores.std():.4f}')

CV scores: [1.     0.9565 0.9545 1.     0.9091]
Average score: 0.9640 ± 0.0339


In [7]:
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='f1_macro')
print('CV scores:', np.round(scores, 4))
print(f'Average score: {scores.mean():.4f} ± {scores.std():.4f}')

CV scores: [1.     0.9556 0.9556 1.     0.9048]
Average score: 0.9632 ± 0.0353
