In [36]:
import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, FeatureUnion

In [49]:
X = pd.DataFrame(
    data={
        'category_1': np.random.randint(low=0, high=4, size=100),
        'category_2': np.random.randint(low=0, high=4, size=100),
    }
)

y = pd.Series(
    data=np.random.random(size=100)
)

In [54]:
from sklearn.base import BaseEstimator, TransformerMixin

class TargetEncoder(BaseEstimator, TransformerMixin):
    def fit(
        self, 
        X: pd.DataFrame, 
        y: pd.Series
    ) -> None:
        self.map_dfs_mean_ = {}
        self.map_dfs_std_ = {}
        self.map_dfs_skew_ = {}

        for col in X.columns:
            self.map_dfs_mean_[col] = y.groupby(X[col]).mean()
            self.map_dfs_std_[col] = y.groupby(X[col]).std()
            self.map_dfs_skew_[col] = y.groupby(X[col]).skew()

    def transform(
        self, 
        X: pd.DataFrame, 
        y: pd.Series=None
    ) -> pd.DataFrame:
        output = pd.DataFrame()

        for col in X.columns:
            output[col+"_mean"] = X[col].map(self.map_dfs_mean_[col])
            output[col+"_std"] = X[col].map(self.map_dfs_std_[col])
            output[col+"_skew"] = X[col].map(self.map_dfs_skew_[col])

        return output

    def fit_transform(
        self, 
        X: pd.DataFrame, 
        y: pd.Series
    ) -> pd.DataFrame:
        self.fit(X, y)
        return self.transform(X, y)

In [55]:
target_encoder = TargetEncoder()

target_encoder.fit_transform(
    X=X,
    y=y
)

Unnamed: 0,category_1_mean,category_1_std,category_1_skew,category_2_mean,category_2_std,category_2_skew
0,0.601451,0.319873,-0.432545,0.467593,0.319688,0.285702
1,0.503497,0.315911,-0.246697,0.473541,0.318379,-0.261890
2,0.419280,0.312586,0.298602,0.522293,0.253797,-0.253063
3,0.485177,0.267580,0.251365,0.572059,0.332541,-0.230531
4,0.485177,0.267580,0.251365,0.522293,0.253797,-0.253063
...,...,...,...,...,...,...
95,0.601451,0.319873,-0.432545,0.467593,0.319688,0.285702
96,0.503497,0.315911,-0.246697,0.572059,0.332541,-0.230531
97,0.601451,0.319873,-0.432545,0.572059,0.332541,-0.230531
98,0.485177,0.267580,0.251365,0.473541,0.318379,-0.261890


In [56]:
categorical_transformer = ColumnTransformer(
    [
     ("target_encoding", TargetEncoder(), ["category_1", "category_2"])
    ]
)

categorical_transformer.fit_transform(
    X=X,
    y=y
)

array([[ 0.6014512 ,  0.31987313, -0.43254477,  0.46759327,  0.3196881 ,
         0.28570166],
       [ 0.50349697,  0.31591091, -0.24669744,  0.47354101,  0.31837913,
        -0.26189019],
       [ 0.41927966,  0.31258568,  0.29860154,  0.52229292,  0.25379674,
        -0.25306313],
       [ 0.48517733,  0.26758008,  0.25136543,  0.57205934,  0.33254121,
        -0.23053133],
       [ 0.48517733,  0.26758008,  0.25136543,  0.52229292,  0.25379674,
        -0.25306313],
       [ 0.41927966,  0.31258568,  0.29860154,  0.46759327,  0.3196881 ,
         0.28570166],
       [ 0.50349697,  0.31591091, -0.24669744,  0.57205934,  0.33254121,
        -0.23053133],
       [ 0.6014512 ,  0.31987313, -0.43254477,  0.57205934,  0.33254121,
        -0.23053133],
       [ 0.41927966,  0.31258568,  0.29860154,  0.57205934,  0.33254121,
        -0.23053133],
       [ 0.48517733,  0.26758008,  0.25136543,  0.52229292,  0.25379674,
        -0.25306313],
       [ 0.48517733,  0.26758008,  0.25136543,  0.