# Pipline creation

## Importing Libraries

In [176]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [177]:
pipeline = Pipeline(steps= [('standard_scalar', StandardScaler()),
                            ('model', LogisticRegression())])

In [178]:
pipeline

In [179]:
# visualize pipeline 
from sklearn import set_config
set_config(display="diagram")

In [180]:
pipeline

## Creating new model

In [181]:
from sklearn.datasets import make_classification
X,y = make_classification(n_samples=10000)

In [182]:
X.shape

(10000, 20)

## Splitting dataset into Train set and test set

In [183]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33,random_state=2)

In [184]:
X_train.shape

(6700, 20)

In [185]:
pipeline.fit(X_train,y_train)

In [186]:
y_pred = pipeline.predict(X_test)

In [187]:
pipeline.score(X_test,y_test)

0.9190909090909091

## Sample 2
##### Displaying a pipeline with standard scalar, dimensionality reduction and then estimator

In [188]:
# importing libraries
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [189]:
pipeline_2 = Pipeline(steps=[("StandardScaler",StandardScaler()),
                             ('PCA',PCA(n_components=3)),
                             ('SVC',SVC())])

In [190]:
pipeline_2

In [191]:
pipeline_2.fit(X_train,y_train)

In [192]:
y_pred = pipeline_2.predict(X_test)

In [193]:
y_pred

array([0, 1, 0, ..., 1, 1, 1])

In [194]:
pipeline_2.score(X_test,y_test)

0.9236363636363636

## Complex examples of columns transformer

In [195]:
from sklearn.impute import SimpleImputer

In [196]:
## numerical processing pipeline
import numpy as np
numerical_processor = Pipeline(steps=[("imputation_mean",SimpleImputer(missing_values=np.nan,strategy='mean')),
                             ("Scaler",StandardScaler())])

In [197]:
numerical_processor

## categorical preprocessing pipeline

In [198]:
categorical_processor = Pipeline(steps=[("imputation_constant",SimpleImputer(fill_value="missing",strategy="constant")),
                             ("Scaler",StandardScaler())])

## one Hot encoder for preprocessing

In [199]:
from sklearn.preprocessing import OneHotEncoder
transformer_pipeline = Pipeline(steps=[("imputation_constant",SimpleImputer(fill_value="missing",strategy="constant")),
                             ("oneHot",OneHotEncoder(handle_unknown="ignore"))])

In [200]:
transformer_pipeline

In [201]:
# combine processing technique
from sklearn.compose import ColumnTransformer

In [202]:
preprocessor = ColumnTransformer([
  ("categorical",categorical_processor,['gender','city']),
  ("numerical",numerical_processor,["age",'height'])
])

In [203]:
preprocessor

In [204]:
from sklearn.pipeline import make_pipeline

In [205]:
pipeline_final = make_pipeline(preprocessor,LogisticRegression())

In [206]:
pipeline_final

In [207]:
# using one of the pipeline feature that we have created
# pipeline_2['StandardScaler'].fit_transform(X_train)

In [208]:
# from sklearn.svm import SVC
# from sklearn.preprocessing import StandardScaler
# from sklearn.datasets import make_classification
# from sklearn.model_selection import train_test_split
# from sklearn.pipeline import Pipeline

# X, y = make_classification(random_state=0,n_samples=10000)
# print(X.shape)
# X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=0)

# pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])
# # The pipeline can be used as any other estimator
# # and avoids leaking the test set into the train set

# pipe.fit(X_train, y_train)

# Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])
# pipe.score(X_test, y_test)