In [1]:
import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler


class MyTransformer(BaseEstimator, TransformerMixin):

    def __init__(self): 
        self._mean_X = None
        self._std_X = None
        self._mean_y = None
        self._std_y = None

    def fit(self, X: np.array, y: np.array = None):
        if isinstance(X, pd.DataFrame):
            self._mean_X = X.values.mean(axis=0)
            self._std_X = X.values.std(axis=0)
        else:
            self._mean_X = X.mean(axis=0)
            self._std_X = X.std(axis=0)

        if y is None:
            return self

        if isinstance(X, pd.DataFrame):
            self._mean_y = y.values.mean()
            self._std_y = y.values.std()
        else:
            self._mean_y = y.mean()
            self._std_y = y.std()
        return self

    def transform(self, X: np.array, y: np.array = None):
        X_ = (X.copy() - self._mean_X) / self._std_X

        if y is None:
            return X_

        y_ = (y.copy() - self._mean_y) / self._std_y
        return X_, y_

In [2]:
df = pd.DataFrame({
    'a': range(2, 12, 2),
    'b': range(20, 120, 20),
    'c': np.linspace(0, 1, 5),
    'd': np.linspace(-1, 1, 5)
})
df

Unnamed: 0,a,b,c,d
0,2,20,0.0,-1.0
1,4,40,0.25,-0.5
2,6,60,0.5,0.0
3,8,80,0.75,0.5
4,10,100,1.0,1.0


In [3]:
MyTransformer().fit_transform(df)

Unnamed: 0,a,b,c,d
0,-1.414214,-1.414214,-1.414214,-1.414214
1,-0.707107,-0.707107,-0.707107,-0.707107
2,0.0,0.0,0.0,0.0
3,0.707107,0.707107,0.707107,0.707107
4,1.414214,1.414214,1.414214,1.414214


In [4]:
StandardScaler().fit_transform(df)

array([[-1.41421356, -1.41421356, -1.41421356, -1.41421356],
       [-0.70710678, -0.70710678, -0.70710678, -0.70710678],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.70710678,  0.70710678,  0.70710678,  0.70710678],
       [ 1.41421356,  1.41421356,  1.41421356,  1.41421356]])

In [5]:
X_train = np.array([[ 1., -1.,  3.],
                    [ 3.,  0.,  0.],
                    [ 0.,  1., -1.]])

In [6]:
StandardScaler().fit_transform(X_train)

array([[-0.26726124, -1.22474487,  1.37281295],
       [ 1.33630621,  0.        , -0.39223227],
       [-1.06904497,  1.22474487, -0.98058068]])

In [7]:
MyTransformer().fit_transform(X_train)

array([[-0.26726124, -1.22474487,  1.37281295],
       [ 1.33630621,  0.        , -0.39223227],
       [-1.06904497,  1.22474487, -0.98058068]])

In [8]:
N = 100
X0 = np.random.uniform(size=N).reshape(-1,1)
X1 = np.random.normal(size=N).reshape(-1,1)
X2 = np.random.binomial(n=10, p=0.2, size=N).reshape(-1,1)
X3 = np.random.exponential(size=N).reshape(-1,1)
X4 = np.random.poisson(lam=1.0, size=N).reshape(-1,1)
X5 = np.random.triangular(left=-1.0, mode=0.0, right=1.0, size=N).reshape(-1,1)
X6 = np.random.weibull(a=1.0, size=N).reshape(-1,1)
X7 = np.random.weibull(a=5.0, size=N).reshape(-1,1)

In [9]:
from sklearn.preprocessing import (MaxAbsScaler, MinMaxScaler, Normalizer, PowerTransformer,
                                   QuantileTransformer, StandardScaler)

def transform(X):
    return pd.DataFrame({
        'X': X.flatten(),
        'BoxCox': PowerTransformer(method='box-cox', standardize=False).fit_transform(X).flatten(),
        'BoxCox_Std': PowerTransformer(method='box-cox', standardize=True).fit_transform(X).flatten(),
        'MaxAbs': MaxAbsScaler().fit_transform(X).flatten(),
        'MinMax': MinMaxScaler().fit_transform(X).flatten(),
        'NormalizerL1': Normalizer(norm='l1').fit_transform(X).flatten(),  # Same as: preprocessing.normalize(df, norm='l1')
        'NormalizerL2': Normalizer(norm='l2').fit_transform(X).flatten(),  # Same as: preprocessing.normalize(df, norm='l2')
        'Quantile10': QuantileTransformer(n_quantiles=10).fit_transform(X).flatten(),
        'Standard': StandardScaler().fit_transform(X).flatten(),
        'YeoJohnson': PowerTransformer(method='yeo-johnson').fit_transform(X).flatten()
    }, index=np.arange(len(X)))

In [10]:
dfX = transform(X0)
dfX

Unnamed: 0,X,BoxCox,BoxCox_Std,MaxAbs,MinMax,NormalizerL1,NormalizerL2,Quantile10,Standard,YeoJohnson
0,0.144420,-1.034066,-1.168049,0.144851,0.133613,1.0,1.0,0.165031,-1.189005,-1.187620
1,0.530450,-0.507120,0.206663,0.532033,0.525884,1.0,1.0,0.512751,0.117557,0.112286
2,0.122155,-1.072224,-1.267598,0.122520,0.110988,1.0,1.0,0.149039,-1.264363,-1.261934
3,0.289276,-0.814725,-0.595827,0.290140,0.280811,1.0,1.0,0.279401,-0.698721,-0.702267
4,0.149916,-1.024896,-1.144125,0.150364,0.139198,1.0,1.0,0.168979,-1.170402,-1.169263
...,...,...,...,...,...,...,...,...,...,...
95,0.601572,-0.424301,0.422724,0.603368,0.598156,1.0,1.0,0.585384,0.358279,0.353891
96,0.562209,-0.469793,0.304043,0.563887,0.558156,1.0,1.0,0.540244,0.225050,0.220099
97,0.242531,-0.881345,-0.769626,0.243255,0.233311,1.0,1.0,0.238427,-0.856935,-0.859232
98,0.039605,-1.236476,-1.696103,0.039724,0.027104,1.0,1.0,0.052531,-1.543761,-1.536748
