# Selecting The Best Model For Machine Learnig Task
To select best model when using multiple models in a pipeline you can use techniques like cross-validation and evluation metrics to compare their performance.Here's an example of how to accomplish this on titanic dataset.

In [8]:
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import numpy as np 
from sklearn.model_selection import train_test_split,cross_val_score, GridSearchCV
from sklearn. preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score 
# load the dataset 
df = sns.load_dataset('titanic')
# select features and target variable 
x = df[['pclass','sex','fare','age','embarked',]]
y = df['survived']
# split the data into traning and testing data 
x_train, x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
# create a list of model to test 
models = [('Random Forest',RandomForestClassifier(random_state=42)),
          ("Gradient Boosting",GradientBoostingClassifier(random_state=42)),
          ("xgboost", XGBClassifier(random_state=42))]

best_model= None
best_accuracy= 0.0
# iterate over the models and save their performance 
for name ,model in models:
# create a pipeline for each model 
    pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ("encoder", OneHotEncoder(handle_unknown='ignore')),
        ("model", model)
    ])
# perform a cross validation 
    scores = cross_val_score(pipeline,x_train,y_train,cv=5)
# calcualte mean accuracy 
    mean_accuracy= scores.mean()
# fit the pipeline on the training data 
    pipeline.fit(x_train,y_train)
# make predication on the test data 
    y_pred =pipeline.predict(x_test)
    accuracy= accuracy_score(y_test,y_pred)
# print the performance matrics
    print('model:', name)
    print('cross_validation_accuracy:',mean_accuracy)
    print('Test_accuracy:',accuracy)
# check if the current model has the best accuracy 
    if accuracy > best_accuracy:
        best_accuracy=accuracy
        best_model=pipeline
# print the best model
print("Best model:",best_model)

model: Random Forest
cross_validation_accuracy: 0.7991726583275879
Test_accuracy: 0.8156424581005587
model: Gradient Boosting
cross_validation_accuracy: 0.8062050625430907
Test_accuracy: 0.8044692737430168
model: xgboost
cross_validation_accuracy: 0.8090318132571653
Test_accuracy: 0.7932960893854749
Best model: Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')),
                ('encoder', OneHotEncoder(handle_unknown='ignore')),
                ('model', RandomForestClassifier(random_state=42))])
