In [1]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns

In [6]:
from sklearn.datasets import load_iris
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import KBinsDiscretizer, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# data
X, y = load_iris(return_X_y=True ,as_frame=True)

# split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# preprocessing
trans = ColumnTransformer([ ('impute', SimpleImputer(), ['sepal length (cm)']),
                            ('scale', StandardScaler(), ['sepal width (cm)']),
                            ("discrete", KBinsDiscretizer(n_bins=3, encode='ordinal'), ['petal length (cm)']),
                    
], remainder='passthrough')

# model
lr = LogisticRegression()
p = Pipeline(steps = [('preprocess',trans), ('model', lr)])

# fit
p.fit(X_train, y_train)

# predict
y_pred = p.predict(X_test)
print(p.score(X_test, y_test))


0.8947368421052632


In [8]:
X_train = pd.DataFrame(X_train, columns=X.columns)
X_train.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
61,5.9,3.0,4.2,1.5
92,5.8,2.6,4.0,1.2
112,6.8,3.0,5.5,2.1
2,4.7,3.2,1.3,0.2
141,6.9,3.1,5.1,2.3


In [7]:
trans.fit(X_train, y_train)
res = trans.transform(X_train)
res = pd.DataFrame(res, columns=['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'])
res.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.9,-0.119255,1.0,1.5
1,5.8,-1.040395,1.0,1.2
2,6.8,-0.119255,2.0,2.1
3,4.7,0.341315,0.0,0.2
4,6.9,0.11103,2.0,2.3


In [16]:
from sklearn.compose import TransformedTargetRegressor
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression

X, y = make_regression(n_samples=100, n_features=4,noise = 20, random_state=0)

# split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
print(y_test[:5])
# model
lr = LinearRegression()

# preprocessing
trans1 = TransformedTargetRegressor(transformer=StandardScaler(), regressor=lr)
trans1.fit(X_train, y_train)
yhat = trans1.predict(X_test)
print(yhat[:5])
# score
print(trans1.score(X_test, y_test))

[-33.99090845 -51.27969551 109.68742613 -34.01817192 142.26248124]
[-13.17071994 -50.96360562 118.24382486 -31.21578889 135.52505508]
0.9235860879265834


In [18]:
import joblib

joblib.dump(trans, 'ColumnTransformer.pkl')
joblib.dump(trans1, 'TransformedTargetRegressor.pkl')
joblib.dump(p, 'Pipeline_of_iris.pkl')

['Pipeline_of_iris.pkl']

In [19]:
col_trans = joblib.load('ColumnTransformer.pkl')
tar_trans = joblib.load('TransformedTargetRegressor.pkl')
pipe = joblib.load('Pipeline_of_iris.pkl')

In [23]:
joblib.dump(X_train, 'X_train_regression.pkl')

['X_train_regression.pkl']

In [20]:
print(col_trans)

ColumnTransformer(remainder='passthrough',
                  transformers=[('impute', SimpleImputer(),
                                 ['sepal length (cm)']),
                                ('scale', StandardScaler(),
                                 ['sepal width (cm)']),
                                ('discrete',
                                 KBinsDiscretizer(encode='ordinal', n_bins=3),
                                 ['petal length (cm)'])])


In [21]:
print(tar_trans)

TransformedTargetRegressor(regressor=LinearRegression(),
                           transformer=StandardScaler())


In [22]:
print(pipe)

Pipeline(steps=[('preprocess',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('impute', SimpleImputer(),
                                                  ['sepal length (cm)']),
                                                 ('scale', StandardScaler(),
                                                  ['sepal width (cm)']),
                                                 ('discrete',
                                                  KBinsDiscretizer(encode='ordinal',
                                                                   n_bins=3),
                                                  ['petal length (cm)'])])),
                ('model', LogisticRegression())])


ThE ENd