In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [3]:
steps=[("standard_scaler",StandardScaler()),
       ("classifier",LogisticRegression())]

In [12]:
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [8]:
pipe=Pipeline(steps)

In [9]:
from sklearn import set_config

In [10]:
set_config(display="diagram")

In [11]:
pipe

In [15]:
# First, define X and y with your data
# For example:
import numpy as np
import pandas as pd

# Option 1: If you have a DataFrame
# df = pd.read_csv('your_data.csv')
# X = df.drop('target_column', axis=1)
# y = df['target_column']

# Option 2: For demonstration with random data
X = np.random.rand(100, 4)  # 100 samples, 4 features
y = np.random.randint(0, 2, 100)  # Binary target variable

# Now split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [16]:
X_train

array([[4.02913273e-01, 2.70217462e-01, 1.03895133e-01, 9.59372013e-01],
       [7.36438561e-01, 9.19686774e-01, 9.81500322e-01, 4.47798039e-01],
       [1.36907462e-01, 1.27066733e-01, 9.49639794e-01, 5.62286058e-01],
       [8.08070984e-01, 3.64938132e-01, 8.38181530e-01, 7.54193451e-01],
       [2.02953208e-01, 1.41435291e-01, 2.49295892e-01, 7.62536074e-02],
       [4.18552779e-01, 6.00404554e-01, 6.49857894e-01, 3.29857161e-01],
       [6.22450647e-01, 8.43168347e-01, 4.91658435e-02, 3.50214673e-01],
       [6.38684355e-01, 4.24772027e-02, 8.87002541e-02, 9.84994009e-01],
       [9.38312709e-01, 1.83795508e-01, 6.99406759e-01, 6.09118078e-01],
       [8.73654929e-01, 1.85646723e-01, 1.76723330e-01, 9.39600412e-01],
       [4.26038023e-01, 8.42066194e-01, 3.92643962e-01, 4.14243554e-01],
       [5.83921309e-02, 7.16170054e-01, 6.92870774e-02, 2.38411303e-01],
       [6.50852998e-01, 7.90261558e-01, 9.34794298e-01, 4.48526261e-01],
       [5.82081140e-01, 2.79411540e-01, 3.65097881e

In [17]:
pipe.fit(X_train,y_train)

In [18]:
y_pred=pipe.predict(X_test)

In [20]:
y_pred

array([1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1])

In [21]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [22]:
steps = [('scaling', StandardScaler()),
         ('PCA', PCA(n_components = 3)),
         ('SVC', SVC())]

In [23]:
pipe2 = Pipeline(steps)
pipe2.fit(X_train, y_train)

In [24]:
pipe2.predict(X_test)

array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1])

In [25]:
from sklearn.impute import SimpleImputer

In [26]:
import numpy as np
numeric_processor=Pipeline(
    steps=[("imputation_mean",SimpleImputer(missing_values=np.nan,strategy="mean")),("scaler",StandardScaler())]

)

In [27]:
numeric_processor

In [28]:
from sklearn.preprocessing import OneHotEncoder
categorical_processor = Pipeline(
    steps=[("imputation_constant",SimpleImputer(fill_value="missing",strategy="constant")),
           ("onehot",OneHotEncoder(handle_unknown="ignore"))]

)

In [29]:
categorical_processor

In [30]:
from sklearn.compose import ColumnTransformer

In [33]:
preprocessor=ColumnTransformer(
    [("categorical",categorical_processor,["gender","city"]),
     ("numerical",numeric_processor,["age","height"])]
)

In [34]:
preprocessor

In [35]:
from sklearn.pipeline import make_pipeline

In [36]:
pipe=make_pipeline(preprocessor,LogisticRegression())

In [37]:
pipe