# Custom Transformers

Here are some examples on how to create custom transformers to apply a transformation, select or drop columns

In [54]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin, BaseEstimator

## Uppercase

In [55]:
class UppercaseColumnTransformer(TransformerMixin):
    def __init__(self, column=None):
        # This is de column to apply the transformation
        self.column = column

    def transform(self, X, **transform_params):
        # Pandas vectorial transformation
        X.loc[:, self.column] = X[self.column].str.upper()
        return X

    def fit(self, X, y=None, **fit_params):
        return self

In [56]:
# sample dataframe
df = pd.DataFrame({
    "id":[1,2,3,4],
    "text":["foo","Bar","BAz","quux"],
    "number":[111,121,13,114],
})
df

Unnamed: 0,id,text,number
0,1,foo,111
1,2,Bar,121
2,3,BAz,13
3,4,quux,114


In [57]:
pipe = Pipeline([
    ('uppercase', UppercaseColumnTransformer('text'))
])

In [58]:
pipe.fit_transform(df)

Unnamed: 0,id,text,number
0,1,FOO,111
1,2,BAR,121
2,3,BAZ,13
3,4,QUUX,114


## Select Columns

In [59]:
class SelectColumnsTransformer(TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns

    def transform(self, X, **transform_params):
        cpy_df = X[self.columns].copy()
        return cpy_df

    def fit(self, X, y=None, **fit_params):
        return self

In [60]:
df = pd.DataFrame({
    'name':['alice','bob','charlie','david','edward'],
    'age':[24,32,np.nan,38,20],
    "number":[111,121,13,114,123],
})
df

Unnamed: 0,name,age,number
0,alice,24.0,111
1,bob,32.0,121
2,charlie,,13
3,david,38.0,114
4,edward,20.0,123


In [61]:
# create a pipeline with a single transformer
pipe = Pipeline([
    ('selector', SelectColumnsTransformer(['name', 'age']))
])

In [62]:
pipe.fit_transform(df)

Unnamed: 0,name,age
0,alice,24.0
1,bob,32.0
2,charlie,
3,david,38.0
4,edward,20.0


## Drop Columns

In [63]:
class DropColumnsTransformer(TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns

    def transform(self, X, **transform_params):
        cpy_df = X.drop(columns=self.columns).copy()
        return cpy_df

    def fit(self, X, y=None, **fit_params):
        return self

In [64]:
df = pd.DataFrame({
    'name':['alice','bob','charlie','david','edward'],
    'age':[24,32,np.nan,38,20],
    "number":[111,121,13,114,123],
})

In [65]:
# create a pipeline with a single transformer
pipe = Pipeline([
    ('selector', DropColumnsTransformer(['age', 'name']))
])

In [66]:
pipe.fit_transform(df)

Unnamed: 0,number
0,111
1,121
2,13
3,114
4,123
