In [1]:
# Author: Roi Yehoshua <roiyeho@gmail.com>
# Date: January 2024
# License: MIT

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

np.random.seed(42)

In [3]:
from sklearn.datasets import load_iris

X, y = load_iris(as_frame=True, return_X_y=True)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

Defining a pipeline

In [5]:
from sklearn.pipeline import Pipeline

model = Pipeline([    
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression())
])

Using the pipeline

In [6]:
model.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()), ('clf', LogisticRegression())])

In [7]:
train_accuracy = model.score(X_train, y_train)
print(f'Train accuracy: {train_accuracy:.4f}')

test_accuracy = model.score(X_test, y_test)
print(f'Test accuracy: {test_accuracy:.4f}')

Train accuracy: 0.9732
Test accuracy: 0.9211


In [8]:
y_pred = model.predict([X_test.iloc[5]])
print(y_pred)

[1]




Modfiying steps in the pipeline

In [9]:
# Accessing by index
model.steps[1]

('clf', LogisticRegression())

In [10]:
# Accessing by name
model.named_steps['clf']

LogisticRegression()

In [11]:
model.set_params(clf__penalty='l1')

Pipeline(steps=[('scaler', StandardScaler()),
                ('clf', LogisticRegression(penalty='l1'))])

In [12]:
print(model)

Pipeline(steps=[('scaler', StandardScaler()),
                ('clf', LogisticRegression(penalty='l1'))])


In [13]:
from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(), LogisticRegression())
model

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('logisticregression', LogisticRegression())])