<a href="https://colab.research.google.com/github/plaban1981/sklearn_pipeline_examples/blob/main/Sklearn_Pipeline_with_Custom_transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
#
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
#
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier,GradientBoostingClassifier
#
from IPython.display import display
#
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.impute._base import SimpleImputer as Imputer
#
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
# 
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
#
import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
#
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
url = "https://raw.githubusercontent.com/abhi-rawat1/machine_learning_projects/master/Sklearn_Pipeline_Custom_transformer/train.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


* 'PassengerId' column is dropped as it wont be used in model training.

In [5]:
df = df.drop('PassengerId', axis=1)
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [7]:
df.dtypes

Survived      int64
Pclass        int64
Name         object
Sex          object
Age         float64
SibSp         int64
Parch         int64
Ticket       object
Fare        float64
Cabin        object
Embarked     object
dtype: object

## Split the data into train and test.

In [8]:
X = df.drop('Survived', axis=1)
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [22]:
X_train.head()

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
801,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31.0,1,1,C.A. 31921,26.25,,S
859,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
855,3,"Aks, Mrs. Sam (Leah Rosen)",female,18.0,0,1,392091,9.35,,S
777,3,"Emanuel, Miss. Virginia Ethel",female,5.0,0,0,364516,12.475,,S
797,3,"Osman, Mrs. Mara",female,31.0,0,0,349244,8.6833,,S


In [23]:
y_train.head()

801    1
859    0
855    1
777    1
797    1
Name: Survived, dtype: int64

* Pclass, Sex, SibSp, Parch and Embarked are Categorical features. We will apply Standard transformers to handle empty values and to encode them into Continuous values.

* Age and Fare are Continuous features. We will apply Standard transformers to handle empty values and to perform feature scaling
* Name and Cabin are Free-Text features and can not be directly used in model training so we will write custom transformation to transform them into some useful data

In [9]:
numeric_features = ['Age', 'Fare']
categorical_features = ['Pclass', 'Sex', 'SibSp', 'Parch', 'Embarked']
name_feature = ['Name']
cabin_feature = ['Cabin']

## Transformers (known as Data pre-processor)


#### Standard Transformers

* numeric_transformer for processing continuous values and 
* categorical_transformer  for processing categorical features values

#### numeric_transformer, there are two steps; 
 * first is to replace empty (NaN) values with median of respective column. 
 * Second step is to apply scaling on continuous features. 
 
#### There are two steps in categorical_transformer for 
* imputing and 
* applying One Hot Encoding

In [10]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [11]:
cabin_categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='U')),
    ('labelEncoder', LabelEncoder()),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

## Custom Transformers: 

* Now to process Cabin and Name input features, we are going to write Custom Transformers as these two features can’t be directly transformed using Standard transformer (you will understand it better in example put down below). 
* Every Custom Transformer has to define at least two methods ‘fit’ & ‘transform’.

## In the ‘fit’ method

* For ‘Cabin’ feature, replacing all empty (na) values with ‘U’
Replacing cabin values with first char of theirs respective values
* In next lines, we are determining unique values of ‘Cabin’ feature (via get_dummies method) and saving it in ‘self.cabin_columns’. It will be used in ‘transform’ method.

In [12]:
from sklearn.base import BaseEstimator, TransformerMixin

In [13]:
class CabinFeatureTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        print('in the CabinFeatureTransformer init method: ')
        
    def fit(self, x, y=None):
        x.Cabin.fillna('U', inplace=True)
        x['Cabin'] = x['Cabin'].map(lambda c: c[0])
        
        cabin_dummies = pd.get_dummies(x['Cabin'], prefix='Cabin')    
        self.cabin_columns=  cabin_dummies.columns
        return self

    def transform(self, x):
        # replacing missing cabins with U (for Uknown)
        x.Cabin.fillna('U', inplace=True)
    
        # mapping each Cabin value with the cabin letter
        x['Cabin'] = x['Cabin'].map(lambda c: c[0])
        
        cabin_dummies = pd.get_dummies(x['Cabin'], prefix='Cabin') 
        cabin_dummies = cabin_dummies.reindex(columns = self.cabin_columns, fill_value=0)
        
        x = pd.concat([x, cabin_dummies], axis=1)

        x.drop('Cabin', axis=1, inplace=True)
    
        return x

In [14]:
class NameFeatureTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        print('in the NameFeatureTransformer Init method: ')
        
    def fit(self, x, y=None):
        return self

    def transform(self, x):
        Title_Dictionary = {
                "Capt": "Officer", "Col": "Officer", "Major": "Officer","Jonkheer": "Royalty",
                "Don": "Royalty","Sir" : "Royalty","Dr": "Officer","Rev": "Officer","the Countess":"Royalty",
                "Mme": "Mrs", "Mlle": "Miss", "Ms": "Mrs", "Mr" : "Mr", "Mrs" : "Mrs", "Miss" : "Miss",
                "Master" : "Master", "Lady" : "Royalty"}
        
        x['Title'] = x['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip())
        x['Title'] = x.Title.map(Title_Dictionary)
        
        x.drop('Name', axis=1, inplace=True)
    
        titles_dummies = pd.get_dummies(x['Title'], prefix='Title')
        x = pd.concat([x, titles_dummies], axis=1)
    
        x.drop('Title', axis=1, inplace=True)
        return x.values

## ColumnTransformer to combine all transformers definition

In [15]:
transformer = ColumnTransformer(
    transformers=[
        ('numeric_data_preprocessing', numeric_transformer, numeric_features), #standard transformer
        ('categorical_data_preprocessing', categorical_transformer, categorical_features),
        ('cabin_data_preprocessing', CabinFeatureTransformer(), cabin_feature),#
        ('name_data_preprocessing', NameFeatureTransformer(), name_feature)
    ])

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


In [43]:
final_pipeline = Pipeline(steps=[('transformer', transformer),
                      ('rf_estimator', RandomForestClassifier())])

In [33]:
final_pipeline.fit(X_train, y_train)

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Pipeline(steps=[('transformer',
                 ColumnTransformer(transformers=[('numeric_data_preprocessing',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  ['Age', 'Fare']),
                                                 ('categorical_data_preprocessing',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value='missing',
                                                                                 strategy='constant')),
                                                                  ('onehot',
                             

In [34]:
y_pred = final_pipeline.predict(X_test)

In [35]:
print("Accuracy Score: ", accuracy_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred, average='weighted'))
print("Precision Score: ", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score: ", recall_score(y_test, y_pred, average='weighted'))

Accuracy Score:  0.8044692737430168
F1 Score:  0.8025751860989428
Precision Score:  0.8020231669980749
Recall Score:  0.8044692737430168


## Save the model

In [36]:
import joblib
joblib.dump(final_pipeline,"Transformer_pipeline.pkl")

['Transformer_pipeline.pkl']

## Load the model

In [37]:
rf_pickle = joblib.load('/content/Transformer_pipeline.pkl')
y_pred = rf_pickle.predict(X_test)

print("Accuracy Score: ", accuracy_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred, average='weighted'))
print("Precision Score: ", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score: ", recall_score(y_test, y_pred, average='weighted'))

Accuracy Score:  0.8044692737430168
F1 Score:  0.8025751860989428
Precision Score:  0.8020231669980749
Recall Score:  0.8044692737430168


In [29]:
X_train.shape

(712, 10)

## Hyperparameter tuning

In [27]:
final_pipeline.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'transformer', 'rf_estimator', 'transformer__n_jobs', 'transformer__remainder', 'transformer__sparse_threshold', 'transformer__transformer_weights', 'transformer__transformers', 'transformer__verbose', 'transformer__verbose_feature_names_out', 'transformer__numeric_data_preprocessing', 'transformer__categorical_data_preprocessing', 'transformer__cabin_data_preprocessing', 'transformer__name_data_preprocessing', 'transformer__numeric_data_preprocessing__memory', 'transformer__numeric_data_preprocessing__steps', 'transformer__numeric_data_preprocessing__verbose', 'transformer__numeric_data_preprocessing__imputer', 'transformer__numeric_data_preprocessing__scaler', 'transformer__numeric_data_preprocessing__imputer__add_indicator', 'transformer__numeric_data_preprocessing__imputer__copy', 'transformer__numeric_data_preprocessing__imputer__fill_value', 'transformer__numeric_data_preprocessing__imputer__missing_values', 'transformer__numeric_data_prep

In [40]:
from sklearn.model_selection import GridSearchCV
from sklearn import *
grid_params ={'rf_estimator__max_depth':[2,4],
              'rf_estimator__n_estimators':[100,200,300],
              'rf_estimator__class_weight':[{0:1,1:5},'balanced']}

clf = GridSearchCV(final_pipeline, grid_params)
clf.fit(X_train, y_train)
print("Best Score: ", clf.best_score_)
print("Best Params: ", clf.best_params_)

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/metaestimators.py", line 113, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)  # noqa
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 711, in score
    return self.steps[-1][1].score(Xt, y, **score_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py", line 808, in predict
    proba = self.predict_proba(X)
  File "/usr/local/lib/python3.7/dist-package

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 
Best Score:  nan
Best Params:  {'rf_estimator__class_weight': {0: 1, 1: 5}, 'rf_estimator__max_depth': 2, 'rf_estimator__n_estimators': 100}




In [51]:
final_pipeline = Pipeline(steps=[('transformer', transformer),
                      ('rf_estimator', RandomForestClassifier(max_depth=2,n_estimators=200))])
final_pipeline.fit(X_train, y_train)

in the CabinFeatureTransformer init method: 
in the NameFeatureTransformer Init method: 


Pipeline(steps=[('transformer',
                 ColumnTransformer(transformers=[('numeric_data_preprocessing',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  ['Age', 'Fare']),
                                                 ('categorical_data_preprocessing',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value='missing',
                                                                                 strategy='constant')),
                                                                  ('onehot',
                             

In [52]:
y_pred = final_pipeline.predict(X_test)
print("Accuracy Score: ", accuracy_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred, average='weighted'))
print("Precision Score: ", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score: ", recall_score(y_test, y_pred, average='weighted'))

Accuracy Score:  0.8100558659217877
F1 Score:  0.8086157049934307
Precision Score:  0.8080371813529882
Recall Score:  0.8100558659217877
