In [25]:
import pandas as pd

from sklearn.datasets import fetch_california_housing

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, Normalizer

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor

from sklearn import set_config
set_config(display='diagram')

from joblib import load, dump

In [3]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)

In [5]:
X.sample(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
15488,3.8967,12.0,5.967172,1.030303,1211.0,3.058081,33.14,-117.12
4916,1.75,47.0,3.627907,1.102326,888.0,4.130233,34.0,-118.27
4418,3.1864,52.0,4.607038,1.121701,1728.0,2.533724,34.09,-118.25
7019,3.7326,35.0,5.425662,1.101833,2033.0,4.14053,33.97,-118.09
2404,1.7292,33.0,4.513678,1.136778,1544.0,4.693009,36.62,-119.31


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [7]:
steps = [
    ( 'scaler', MinMaxScaler() ),
    ( 'normalizer', Normalizer() ),
    ( 'model', KNeighborsRegressor() )
]

In [8]:
pipeline = Pipeline(steps=steps)

In [9]:
pipeline

In [10]:
type(pipeline)

sklearn.pipeline.Pipeline

In [11]:
pipeline.named_steps

{'scaler': MinMaxScaler(),
 'normalizer': Normalizer(),
 'model': KNeighborsRegressor()}

In [12]:
pipeline.steps

[('scaler', MinMaxScaler()),
 ('normalizer', Normalizer()),
 ('model', KNeighborsRegressor())]

In [17]:
pipeline.fit(X_train, y_train)

In [18]:
pipeline['model']

In [19]:
pipeline['model'].get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [21]:
pipeline['scaler'].get_params()

{'copy': True, 'feature_range': (0, 1)}

In [22]:
pipeline['model'].predict(X_test)

array([1.5438, 1.6272, 1.6272, ..., 1.1934, 1.6272, 1.6272])

In [23]:
pipeline.predict(X_test)

array([2.3254  , 1.9138  , 0.9158  , ..., 5.00001 , 4.609206, 2.438   ])

In [24]:
pipeline.score(X_test, y_test)

0.6393229150380757

In [26]:
dump(pipeline, 'pipeline.pkl')

['pipeline.pkl']

In [27]:
p = load('pipeline.pkl')

In [28]:
p

In [29]:
p.predict(X)

array([5.00001, 4.1406 , 3.8468 , ..., 0.8112 , 0.866  , 1.0674 ])