In [1]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [2]:
steps=[("standard_scaler",StandardScaler()),
       ("classifier",LogisticRegression())]

In [3]:
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [4]:
pipe=Pipeline(steps)

In [5]:
from sklearn import set_config

In [6]:
set_config(display="diagram")

In [7]:
pipe

In [8]:
# First, define X and y with your data
# For example:
import numpy as np
import pandas as pd

# Option 1: If you have a DataFrame
# df = pd.read_csv('your_data.csv')
# X = df.drop('target_column', axis=1)
# y = df['target_column']

# Option 2: For demonstration with random data
X = np.random.rand(100, 4)  # 100 samples, 4 features
y = np.random.randint(0, 2, 100)  # Binary target variable

# Now split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [9]:
X_train

array([[0.92840337, 0.17054247, 0.57848839, 0.66252521],
       [0.93503943, 0.26818716, 0.75279355, 0.14692529],
       [0.6095987 , 0.49041276, 0.48452194, 0.0047242 ],
       [0.92649389, 0.34379907, 0.03075987, 0.46626546],
       [0.48676529, 0.92922511, 0.34862538, 0.69030128],
       [0.67802889, 0.90160819, 0.11166041, 0.35601428],
       [0.44300732, 0.51786866, 0.70916903, 0.54406198],
       [0.34096111, 0.2922914 , 0.32378477, 0.29297556],
       [0.48983282, 0.3749238 , 0.45036819, 0.18393234],
       [0.98234105, 0.31512499, 0.43775186, 0.4476034 ],
       [0.29482855, 0.90972932, 0.22474311, 0.15164712],
       [0.80944634, 0.0266574 , 0.69625672, 0.95694618],
       [0.56777017, 0.55800939, 0.71177284, 0.05208073],
       [0.39964208, 0.88170837, 0.53157255, 0.01280531],
       [0.61225973, 0.89929016, 0.45525726, 0.60762976],
       [0.66438207, 0.84271138, 0.23452071, 0.61575516],
       [0.37656565, 0.91790078, 0.61205124, 0.98477823],
       [0.23531395, 0.68776946,

In [10]:
pipe.fit(X_train,y_train)

In [11]:
y_pred=pipe.predict(X_test)

In [12]:
y_pred

array([0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0])

In [13]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [14]:
steps = [('scaling', StandardScaler()),
         ('PCA', PCA(n_components = 3)),
         ('SVC', SVC())]

In [15]:
pipe2 = Pipeline(steps)
pipe2.fit(X_train, y_train)

In [16]:
pipe2.predict(X_test)

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0])

In [18]:
from sklearn.impute import SimpleImputer

In [19]:
import numpy as np
numeric_processor=Pipeline(
    steps=[("imputation_mean",SimpleImputer(missing_values=np.nan,strategy="mean")),("scaler",StandardScaler())]

)

In [20]:
numeric_processor

In [22]:
from sklearn.preprocessing import OneHotEncoder
categorical_processor = Pipeline(
    steps=[("imputation_constant",SimpleImputer(fill_value="missing",strategy="constant")),
           ("onehot",OneHotEncoder(handle_unknown="ignore"))]

)

In [23]:
categorical_processor

In [24]:
from sklearn.compose import ColumnTransformer

In [25]:
preprocessor=ColumnTransformer(
    [("categorical",categorical_processor,["gender","city"]),
     ("numerical",numeric_processor,["age","height"])]
)

In [26]:
preprocessor

In [27]:
from sklearn.pipeline import make_pipeline

In [28]:
pipe=make_pipeline(preprocessor,LogisticRegression())

In [29]:
pipe