# Pipeline Stages

##### Pipeline is mainly used for  automate the workflows. Example are data preprocessing, model training, model  deployment kind of things

## Simple Piepline

In [1]:
from sklearn.pipeline import Pipeline
## feature scaling
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [49]:
steps=[("standard_scaler", StandardScaler()),
       ("Logistic Regression", LogisticRegression())]

In [50]:
steps

[('standard_scaler', StandardScaler()),
 ('Logistic Regression', LogisticRegression())]

In [52]:
pipe=Pipeline(steps)

In [53]:
## Visualize the pipeline

In [54]:
from sklearn import set_config
set_config(display="diagram")

In [55]:
pipe

In [56]:
## creating the dataset

In [57]:
from sklearn.datasets import make_classification
X,y=make_classification(n_samples=10000)

In [58]:
X.shape,  y.shape

((10000, 20), (10000,))

In [59]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [60]:
X_train.shape, y_train.shape

((8000, 20), (8000,))

In [61]:
X_test.shape, y_test.shape

((2000, 20), (2000,))

In [62]:
pipe.fit(X_train,y_train)

In [63]:
predictions = pipe.predict(X_test)
predictions

array([0, 0, 1, ..., 0, 0, 1])

In [64]:
## Example 2 

In [65]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA

In [66]:
steps=[("scaling", StandardScaler()),
        ("pca",PCA(n_components=3)),
        ("svc_classifier", SVC())]

In [67]:
pipe2=Pipeline(steps)

In [68]:
from sklearn import set_config
set_config(display="diagram")

In [69]:
pipe2

In [70]:
pipe2.fit(X_train,y_train)

In [71]:
predictions=pipe2.predict(X_test)
predictions

array([0, 0, 1, ..., 1, 0, 0])

In [72]:
from sklearn.impute import SimpleImputer
import numpy as np
from sklearn.preprocessing import OneHotEncoder

In [73]:
numerical_pipeline=Pipeline(
    steps=[("imputation_mean",SimpleImputer(missing_values=np.nan, strategy="mean")),("scaler", StandardScaler())]
)

In [74]:
numerical_pipeline

In [75]:
categorical_pipeline=Pipeline(
    steps=[("imputation_constant",SimpleImputer(fill_value="missing", strategy="constant")),("scaler", StandardScaler()),
           ("one hot encoding", OneHotEncoder(handle_unknown="ignore"))])

In [76]:
categorical_pipeline

## Complex pipeline

In [84]:
## combining processing techniques

In [85]:
from sklearn.compose import ColumnTransformer

In [86]:
transformers=ColumnTransformer(
    [("categorical_processor", categorical_pipeline, ["gender","city"]),
     ("numerical_processor", numerical_pipeline, ["age","height"])])

In [87]:
#transformers

In [88]:
from sklearn.pipeline import make_pipeline
pipe3=make_pipeline(transformers, LogisticRegression())
pipe3