In [26]:
# Loading libraries

import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


In [2]:
iris = load_iris()

In [4]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [None]:
# Missing Value
# Oultier Treatment
# Encoding

In [5]:
X=iris.data
y=iris.target

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=10)

In [8]:
# Now we are going to create ML piplines
# 1. Data preprocressing using Standard Scaler
# 2. Redeuce Dimension using PCA
# 3. Apply Classifier Algo

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
pca = PCA(n_components=2)
pca_scaled = pca.fit_transform(X_train_scaled)

In [24]:
random_clf = RandomForestClassifier()
random_clf.fit(pca_scaled,y_train)

RandomForestClassifier()

In [25]:
random_clf.score(pca_scaled,y_train)

1.0

## Pipeline Method

In [28]:
pipline_lr = Pipeline([('scaler1',StandardScaler()),
                      ('pca1',PCA(n_components=2)),
                      ('lr_clf',LogisticRegression())])

pipline_dt = Pipeline([('scaler2',StandardScaler()),
                      ('pca2',PCA(n_components=2)),
                      ('dt_clf',DecisionTreeClassifier())])

pipline_rf = Pipeline([('scaler3',StandardScaler()),
                      ('pca3',PCA(n_components=2)),
                      ('rf_clf',RandomForestClassifier())])

In [29]:
pipelines=[pipline_lr,pipline_dt,pipline_rf]
for pipe in pipelines:
  pipe.fit(X_train,y_train)

In [35]:
pipe_dict = {0:'Logistic Regression',1:'Decision Tree',2:'Random Forest'}

In [38]:
for i,model in enumerate(pipelines):
  print(f" The test Accuracy for the {pipe_dict[i]} is {model.score(X_test,y_test)}")

 The test Accuracy for the Logistic Regression is 0.9333333333333333
 The test Accuracy for the Decision Tree is 0.9555555555555556
 The test Accuracy for the Random Forest is 0.9333333333333333


# Pipeline and Grid Search CV 

In [41]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

In [58]:
clf = make_pipeline(StandardScaler(),GridSearchCV(LogisticRegression(),param_grid={'solver':['liblinear'],'penalty':['l1','l2']},cv=10))


In [59]:
clf.fit(X_train,y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('gridsearchcv',
                 GridSearchCV(cv=10, estimator=LogisticRegression(),
                              param_grid={'penalty': ['l1', 'l2'],
                                          'solver': ['liblinear']}))])

In [60]:
clf.score(X_test,y_test)

0.9333333333333333

In [None]:
param_grid_list = {'lr_params':[{'penalty': ['l1', 'l2'],'solver': ['liblinear']}], 'dt_params':[{}],'rf_params':[{}]}