In [31]:
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

data = {
    'Name': ['Anna', 'Bob', 'Charlie', 'Diana', 'Eric'],
    'Age': [20, 34, 23, None, 33],
    'Gender': ['f', 'm', 'm', 'f', 'm'],
    'Job': ['Programmer', 'Writer', 'Cook', 'Programmer', 'Teacher']
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Gender,Job
0,Anna,20.0,f,Programmer
1,Bob,34.0,m,Writer
2,Charlie,23.0,m,Cook
3,Diana,,f,Programmer
4,Eric,33.0,m,Teacher


In [34]:
class NameDropper (BaseEstimator, TransformerMixin):
    '''
    Transformer Class to drop the Name Column of the DataFrame.
    '''
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X.drop(['Name'], axis=1)
    
class AgeImputer(BaseEstimator, TransformerMixin):
    '''
    Transformer Class to replace all None values in the Age Column with the mean value.
    '''
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        imputer = SimpleImputer(strategy='mean')
        X['Age'] = imputer.fit_transform(X[['Age']])
        return X
    
class FeatureEncoder(BaseEstimator, TransformerMixin):
    '''
    Transformer Class to transform the gender column into numerical values
    and apply OneHot Encoding to the Jobs column.
    '''
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        # Numeric Gender
        gender_dct = {'m': 0, 'f': 1}
        X['Gender'] = [gender_dct[g] for g in X['Gender']]

        # OneHotEncoder Jobs
        encoder = OneHotEncoder()
        matrix = encoder.fit_transform(X[['Job']]).toarray()

        column_names = ['Programmer', 'Writer', 'Cook', 'Teacher']

        for i in range(len(matrix.T)):
            X[column_names[i]] = matrix.T[i]

        X = X.drop(['Job'], axis=1)
        return X

In [33]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ("dropper", NameDropper()),
    ('imputer', AgeImputer()),
    ('encoder', FeatureEncoder())
])

pipe.fit_transform(df)

Unnamed: 0,Age,Gender,Programmer,Writer,Cook,Teacher
0,20.0,1,0.0,1.0,0.0,0.0
1,34.0,0,0.0,0.0,0.0,1.0
2,23.0,0,1.0,0.0,0.0,0.0
3,27.5,1,0.0,1.0,0.0,0.0
4,33.0,0,0.0,0.0,1.0,0.0
