In [19]:
from sklearn.datasets import load_iris

df = load_iris()

In [18]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(df.data, df.target, test_size=0.3, random_state=46)

#### pipeline creation
- 1.data preprocessing by using standard scaler
- 2.reduce dimension using PCA
- 3.apply classifier

In [30]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [35]:
lr_pipeline = Pipeline([('scaler1', StandardScaler()),
                      ('pca1',PCA(n_components=2)),
                      ('lr_classifier',LogisticRegression())])
lr_pipeline

Pipeline(steps=[('scaler1', StandardScaler()), ('pca1', PCA(n_components=2)),
                ('lr_classifier', LogisticRegression())])

In [34]:
dt_pipeline = Pipeline([('scaler2',StandardScaler()),
                        ('pca2',PCA(n_components=2)),
                        ('dt_classifier', DecisionTreeClassifier())])
dt_pipeline

Pipeline(steps=[('scaler2', StandardScaler()), ('pca2', PCA(n_components=2)),
                ('dt_classifier', DecisionTreeClassifier())])

In [36]:
rf_pipeline = Pipeline([('scaler3',StandardScaler()),
                        ('pca3',PCA(n_components=2)),
                        ('rf_classifier', RandomForestClassifier())])
rf_pipeline

Pipeline(steps=[('scaler3', StandardScaler()), ('pca3', PCA(n_components=2)),
                ('rf_classifier', RandomForestClassifier())])

In [37]:
#list of pipelines
pipelines = [lr_pipeline, dt_pipeline, rf_pipeline]

In [40]:
pipe_dict = {0:"Logistic Regression", 1:"Decision TreeClassifier", 2:"RandomForest Classifier" }

for pipe in pipelines:
    pipe.fit(X_train, y_train)

In [44]:
for i,model in enumerate(pipelines):
    print("{} Accuracy is {}".format(pipe_dict[i], model.score(X_test,y_test)))

Logistic Regression Accuracy is 0.8666666666666667
Decision TreeClassifier Accuracy is 0.8888888888888888
RandomForest Classifier Accuracy is 0.9111111111111111


In [46]:
best_accuracy =0.0
best_classifier =0
best_pipeline = ""

for i,model in enumerate(pipelines):
    if model.score(X_test, y_test)>best_accuracy:
        best_accuracy = model.score(X_test, y_test)
        best_pipeline = model
        best_classifier = i
print("Classifier with best accuracy:{}".format(pipe_dict[best_classifier]))

Classifier with best accuracy:RandomForest Classifier


(for more)["https://www.youtube.com/watch?v=w9IGkBfOoic&ab_channel=ITkFundeITkFunde"]