In [10]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import Normalizer

class CustomColumnsNormalizer(BaseEstimator, TransformerMixin):
    
    """
    A transformer for normalizing selected columns of a DataFrame.

    Parameters:
    -----------
    columns_to_normalize: list
        A list of column names to be normalized.

    norm: str, default='l2'
        The norm to use for normalization. Options: 'l1', 'l2', 'max'.

    Attributes:
    -----------
    columns_to_normalize: list
        The list of column names to be normalized.

    norm: str
        The norm used for normalization.

    normalizer: Normalizer
        The scikit-learn Normalizer instance used for normalization.

    Methods:
    --------
    fit(X, y=None):
        Fit the transformer to the data. It initializes the Normalizer instance with the specified norm.

    transform(X):
        Normalize the selected columns of the input DataFrame X.

    Examples:
    ---------
    >>> import pandas as pd
    >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]})
    >>> columns_to_normalize = ['A', 'B']
    >>> transformer = CustomColumnNormalizer(columns_to_normalize=columns_to_normalize, norm='l2')
    >>> df_transformed = transformer.transform(df)
    >>> df_transformed
              A         B  C
    0  0.267261  0.801784  5
    1  0.447214  0.894427  6
    """
    def __init__(self, variables, norm='l2'):
        self.variables = variables
        self.norm = norm
        self.normalizer = Normalizer(norm=self.norm)

    def fit(self, X, y=None):
        self.normalizer.fit(X[self.variables])
        return self

    def transform(self, X):
        X_normalized = X.copy()
        X_normalized[self.variables] = self.normalizer.transform(X[self.variables])
        return X_normalized


In [21]:
from datetime import datetime, timedelta

# Número de filas
num_rows = 100

# Variables categóricas
categorias1 = np.random.choice(['A', 'B', 'C'], size=num_rows)
categorias2 = np.random.choice(['X', 'Y', 'Z'], size=num_rows)

# Variables numéricas
numericas1 = np.random.rand(num_rows)
numericas2 = np.random.randint(1, 100, size=num_rows)

# Fechas
fecha_inicial = datetime(2023, 1, 1)
fechas1 = [fecha_inicial + timedelta(days=np.random.randint(0, 365)) for _ in range(num_rows)]
fechas2 = [fecha_inicial + timedelta(days=np.random.randint(0, 365)) for _ in range(num_rows)]

# Crear el DataFrame
data = {
    'Categoria1': categorias1,
    'Categoria2': categorias2,
    'Numerica1': numericas1,
    'Numerica2': numericas2,
    'Fecha1': fechas1,
    'Fecha2': fechas2,
}

df = pd.DataFrame(data)

# Mostrar las primeras filas del DataFrame
df

Unnamed: 0,Categoria1,Categoria2,Numerica1,Numerica2,Fecha1,Fecha2
0,A,Y,0.610228,79,2023-05-22,2023-07-08
1,B,X,0.237976,93,2023-06-20,2023-04-15
2,B,Y,0.334432,6,2023-04-27,2023-11-29
3,A,Y,0.983904,79,2023-03-21,2023-06-02
4,C,X,0.358349,52,2023-11-23,2023-11-20
...,...,...,...,...,...,...
95,C,X,0.261394,74,2023-07-11,2023-01-01
96,B,X,0.552804,38,2023-08-21,2023-01-17
97,A,X,0.588749,92,2023-05-30,2023-05-24
98,A,X,0.543828,20,2023-12-10,2023-08-05


In [24]:
transfomer = CustomColumnsNormalizer(variables=df.select_dtypes(include=[float, int]).columns.tolist())
transfomer.fit(df)
trans = transfomer.transform(df)

In [26]:
trans

Unnamed: 0,Categoria1,Categoria2,Numerica1,Numerica2,Fecha1,Fecha2
0,A,Y,0.007724,0.999970,2023-05-22,2023-07-08
1,B,X,0.002559,0.999997,2023-06-20,2023-04-15
2,B,Y,0.055652,0.998450,2023-04-27,2023-11-29
3,A,Y,0.012454,0.999922,2023-03-21,2023-06-02
4,C,X,0.006891,0.999976,2023-11-23,2023-11-20
...,...,...,...,...,...,...
95,C,X,0.003532,0.999994,2023-07-11,2023-01-01
96,B,X,0.014546,0.999894,2023-08-21,2023-01-17
97,A,X,0.006399,0.999980,2023-05-30,2023-05-24
98,A,X,0.027181,0.999631,2023-12-10,2023-08-05
