In [1]:
import pandas as pd

data = {"name": ['khaoula','siham','sohaib','houda'],
        "age": [20, 21, None, 22],
        "gender": ['f', 'f', 'm', 'f'],
        "job": ['programmer', 'cloud', 'programmer', 'secure']}

df = pd.DataFrame(data)

In [2]:
df

Unnamed: 0,name,age,gender,job
0,khaoula,20.0,f,programmer
1,siham,21.0,f,cloud
2,sohaib,,m,programmer
3,houda,22.0,f,secure


In [6]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Impute ages with mean
imputer = SimpleImputer(strategy='mean')
df['age'] = imputer.fit_transform(df[['age']])

# Turn 'gender' to numeric
gender_dict = {'m': 0, 'f': 1}
df['gender'] = df['gender'].map(gender_dict)

# One-hot encoding for 'job'
df = pd.get_dummies(df, columns=['job'], prefix='job_')

# Display the updated DataFrame
df


Unnamed: 0,age,gender,job__Doctor,job__Engineer,job__Teacher
0,25.0,0,False,True,False
1,30.0,1,True,False,False
2,30.0,1,False,False,True
3,35.0,0,False,True,False


In [13]:
from sklearn.base import BaseEstimator, TransformerMixin

# x= df
data = {"name": ['khaoula','siham','sohaib','houda'],
        "age": [20, 21, None, 22],
        "gender": ['f', 'f', 'm', 'f'],
        "job": ['programmer', 'cloud', 'programmer', 'secure']}

df = pd.DataFrame(data)
class NameDropper(BaseEstimator, TransformerMixin):
    def fit(self, x, y=None):
        return self
    def transform(self, x):
        return x.drop(['name'], axis=1)
        
class AgeImputer(BaseEstimator, TransformerMixin):
    def fit(self, x, y=None):
        return self
    def transform(self, x):
        imputer = SimpleImputer(strategy='mean')
        x['age'] = imputer.fit_transform(x[['age']])
        return x

class FeatureEncoder(BaseEstimator, TransformerMixin):
    def fit(self, x, y=None):
        return self
    def transform(self, x):
        return pd.get_dummies(x, columns=['job'], prefix='job')

In [14]:
dropper = NameDropper()
imputer = AgeImputer()
encoder = FeatureEncoder()

In [15]:
dropper.fit_transform(df)

Unnamed: 0,age,gender,job
0,20.0,f,programmer
1,21.0,f,cloud
2,,m,programmer
3,22.0,f,secure


In [16]:
imputer.fit_transform(dropper.fit_transform(df))

Unnamed: 0,age,gender,job
0,20.0,f,programmer
1,21.0,f,cloud
2,21.0,m,programmer
3,22.0,f,secure


In [17]:
encoder.fit_transform(imputer.fit_transform(dropper.fit_transform(df)))

Unnamed: 0,age,gender,job_cloud,job_programmer,job_secure
0,20.0,f,False,True,False
1,21.0,f,True,False,False
2,21.0,m,False,True,False
3,22.0,f,False,False,True


In [18]:
#set pipeline
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ("dropper", NameDropper()),
    ("imputer", AgeImputer()),
    ("encoder", FeatureEncoder())
])
pipe.fit_transform(df)

Unnamed: 0,age,gender,job_cloud,job_programmer,job_secure
0,20.0,f,False,True,False
1,21.0,f,True,False,False
2,21.0,m,False,True,False
3,22.0,f,False,False,True
