# 5.3.5 QuantileTransformer

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import seaborn as sns
from sklearn.preprocessing import QuantileTransformer

import warnings
warnings.filterwarnings("ignore")

Este método transforma las características para que sigan una distribución normal o uniforme.

Tiende a dispersar los valores más frecuentes.

Reduce el impacto de los outliers.

Se aplica a cada columna del dataset de forma independiente.

Cada columna es transformada usando la siguiente fórmula:

$$ G^{−1}(F(X)) $$

donde $F$ es la distribución acumulada de la características y $G−1$ es la función cuartil de la distribución deseada $G.$

In [None]:
penguins = sns.load_dataset("penguins")

data = penguins[["flipper_length_mm"]]
data = data.rename(columns={"flipper_length_mm": "original"})

In [None]:
quantileTransformer_uniform = QuantileTransformer(
    # -------------------------------------------------------------------------
    # Number of quantiles to be computed
    n_quantiles=100,
    # -------------------------------------------------------------------------
    # 'uniform' / 'normal'
    output_distribution="uniform",
    # -------------------------------------------------------------------------
    # Maximum number of samples used to estimate the quantiles for
    # computational efficiency.
    subsample=100000,
    # -------------------------------------------------------------------------
    # Determines random number generation for subsampling and smoothing noise.
    random_state=None,
)
quantileTransformer_uniform.fit(data[["original"]])

data["transformed_uniform"] = quantileTransformer_uniform.transform(data[["original"]])

In [None]:
quantileTransformer_normal = QuantileTransformer(
    n_quantiles=100,
    output_distribution="normal",
)

quantileTransformer_normal.fit(data[["original"]])

data["transformed_normal"] = quantileTransformer_normal.transform(data[["original"]])

In [None]:
g = sns.jointplot(x="original", y="transformed_uniform", data=data, kind="scatter")
g.fig.set_figwidth(3)
g.fig.set_figheight(3)

In [None]:
g = sns.jointplot(x="original", y="transformed_normal", data=data, kind="scatter")
g.fig.set_figwidth(3)
g.fig.set_figheight(3)

In [None]:
quantileTransformer_normal.n_quantiles_

In [None]:
quantileTransformer_normal.quantiles_

In [None]:
quantileTransformer_normal.references_

In [None]:
print('ok_')