In [1]:
import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler


class MyTransformer(BaseEstimator, TransformerMixin):

    def __init__(self): 
        self._mean_X = None
        self._std_X = None

    def fit(self, X: np.array, y = None):
        if isinstance(X, pd.DataFrame):
            self._mean_X = X.values.mean(axis=0)
            self._std_X = X.values.std(axis=0)
        else:
            self._mean_X = X.mean(axis=0)
            self._std_X = X.std(axis=0)
        return self

    def transform(self, X: np.array, y = None):
        return (X.copy() - self._mean_X) / self._std_X


In [2]:
df = pd.DataFrame({
    'a': range(2, 12, 2),
    'b': range(20, 120, 20),
    'c': np.linspace(0, 1, 5),
    'd': np.linspace(-1, 1, 5)
})
df

Unnamed: 0,a,b,c,d
0,2,20,0.0,-1.0
1,4,40,0.25,-0.5
2,6,60,0.5,0.0
3,8,80,0.75,0.5
4,10,100,1.0,1.0


In [3]:
MyTransformer().fit_transform(df)

Unnamed: 0,a,b,c,d
0,-1.414214,-1.414214,-1.414214,-1.414214
1,-0.707107,-0.707107,-0.707107,-0.707107
2,0.0,0.0,0.0,0.0
3,0.707107,0.707107,0.707107,0.707107
4,1.414214,1.414214,1.414214,1.414214


In [4]:
StandardScaler().fit_transform(df)

array([[-1.41421356, -1.41421356, -1.41421356, -1.41421356],
       [-0.70710678, -0.70710678, -0.70710678, -0.70710678],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.70710678,  0.70710678,  0.70710678,  0.70710678],
       [ 1.41421356,  1.41421356,  1.41421356,  1.41421356]])

In [5]:
X_train = np.array([[ 1., -1.,  3.],
                    [ 3.,  0.,  0.],
                    [ 0.,  1., -1.]])

In [6]:
StandardScaler().fit_transform(X_train)

array([[-0.26726124, -1.22474487,  1.37281295],
       [ 1.33630621,  0.        , -0.39223227],
       [-1.06904497,  1.22474487, -0.98058068]])

In [7]:
MyTransformer().fit_transform(X_train)

array([[-0.26726124, -1.22474487,  1.37281295],
       [ 1.33630621,  0.        , -0.39223227],
       [-1.06904497,  1.22474487, -0.98058068]])

In [8]:
N = 100
X0 = np.random.uniform(size=N).reshape(-1,1)
X1 = np.random.normal(size=N).reshape(-1,1)
X2 = np.random.binomial(n=10, p=0.2, size=N).reshape(-1,1)
X3 = np.random.exponential(size=N).reshape(-1,1)
X4 = np.random.poisson(lam=1.0, size=N).reshape(-1,1)
X5 = np.random.triangular(left=-1.0, mode=0.0, right=1.0, size=N).reshape(-1,1)
X6 = np.random.weibull(a=1.0, size=N).reshape(-1,1)
X7 = np.random.weibull(a=5.0, size=N).reshape(-1,1)

In [9]:
from sklearn.preprocessing import (MaxAbsScaler, MinMaxScaler, Normalizer, PowerTransformer,
                                   QuantileTransformer, StandardScaler)

def transform(X):
    return pd.DataFrame({
        'X': X.flatten(),
        'BoxCox': PowerTransformer(method='box-cox', standardize=False).fit_transform(X).flatten(),
        'BoxCox_Std': PowerTransformer(method='box-cox', standardize=True).fit_transform(X).flatten(),
        'MaxAbs': MaxAbsScaler().fit_transform(X).flatten(),
        'MinMax': MinMaxScaler().fit_transform(X).flatten(),
        'NormalizerL1': Normalizer(norm='l1').fit_transform(X).flatten(),  # Same as: preprocessing.normalize(df, norm='l1')
        'NormalizerL2': Normalizer(norm='l2').fit_transform(X).flatten(),  # Same as: preprocessing.normalize(df, norm='l2')
        'Quantile10': QuantileTransformer(n_quantiles=10).fit_transform(X).flatten(),
        'Standard': StandardScaler().fit_transform(X).flatten(),
        'YeoJohnson': PowerTransformer(method='yeo-johnson').fit_transform(X).flatten()
    }, index=np.arange(len(X)))

In [10]:
dfX = transform(X0)
dfX

Unnamed: 0,X,BoxCox,BoxCox_Std,MaxAbs,MinMax,NormalizerL1,NormalizerL2,Quantile10,Standard,YeoJohnson
0,0.793056,-0.216534,1.138456,0.824168,0.819421,1.0,1.0,0.833488,1.204838,1.177046
1,0.678187,-0.346616,0.818730,0.704793,0.696823,1.0,1.0,0.730252,0.790996,0.825828
2,0.220117,-0.996600,-0.778847,0.228753,0.207932,1.0,1.0,0.238419,-0.859300,-0.840341
3,0.788387,-0.221667,1.125840,0.819316,0.814438,1.0,1.0,0.828420,1.188016,1.163180
4,0.204805,-1.025197,-0.849134,0.212840,0.191589,1.0,1.0,0.217297,-0.914464,-0.905514
...,...,...,...,...,...,...,...,...,...,...
95,0.204303,-1.026148,-0.851474,0.212318,0.191054,1.0,1.0,0.216858,-0.916273,-0.907664
96,0.293689,-0.869093,-0.465451,0.305211,0.286454,1.0,1.0,0.348779,-0.594239,-0.537355
97,0.741728,-0.273650,0.998073,0.770827,0.764640,1.0,1.0,0.777778,1.019919,1.022756
98,0.844099,-0.161196,1.274471,0.877213,0.873899,1.0,1.0,0.888889,1.388730,1.326488
