In [101]:
from data_module import load_data

In [103]:
df = load_data()
print(df)

      Name   Age Gender         Job
0     Anna  20.0      f  Programmer
1      Bob  34.0      m      Writer
2  Charlie  23.0      m        Cook
3    Diana   NaN      f  Programmer
4     Eric  33.0      m     Teacher


In [104]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
import pandas as pd


class NameDropper(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y = None):
        return self
    
    def transform(self, X):
        return X.drop(['Name'], axis = 1)
    
class AgeImputer(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y = None):
        return self
    
    def transform(self, X):
        imputer = SimpleImputer(strategy='mean')
        X['Age'] = imputer.fit_transform(X[['Age']])
        return X
    
class GenderTransformer(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y = None):
        return self
    
    def transform(self, X):
        X = X.copy()
        X['Gender'] = X['Gender'].replace({'f':0, 'm':1})
        return X
        
class JobEncoder(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y = None):
        return self
    
    def transform(self, X):
        X = pd.get_dummies(X, columns = ['Job'])
        return X

In [105]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ("dropper" , NameDropper()),
    ("imputer", AgeImputer()),
    ("transformer", GenderTransformer()),
    ("encoder", JobEncoder())   
]
)

In [106]:
z = pipe.fit_transform(df)

In [107]:
print(z)

    Age  Gender  Job_Cook  Job_Programmer  Job_Teacher  Job_Writer
0  20.0       0         0               1            0           0
1  34.0       1         0               0            0           1
2  23.0       1         1               0            0           0
3  27.5       0         0               1            0           0
4  33.0       1         0               0            1           0
