In [7]:
import numpy as np 
import pandas as pd
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [3]:
class Padronizacao(BaseEstimator,TransformerMixin):
    def __init__(self,columns:list | str) -> None:
        self.columns = columns 

    def fit(self,X,y=None):
        return self
    
    def transform(self, X:pd.DataFrame):  
        for column in self.columns:
            X[column] = StandardScaler()\
                .fit_transform(X[column].values.reshape(-1, 1))
        return X

In [4]:
class Teto(BaseEstimator,TransformerMixin):
    def __init__(self,columns:list | str) -> None:
        self.columns = columns 

    def fit(self,X,y=None):
        return self
    
    def transform(self, X:pd.DataFrame):
        for column in self.columns:
            X[column] = np.floor(X[column].values)
        return X

In [5]:
class Dummies(BaseEstimator,TransformerMixin):
    def __init__(self,columns:list | str) -> None:
        self.columns = columns 

    def fit(self,X,y=None):
        return self
    
    def transform(self, X:pd.DataFrame):
        return pd.get_dummies(X,columns=self.columns,dtype=int)

In [8]:
class RemovendoAtributos(BaseEstimator,TransformerMixin):
    def __init__(self,columns:list) -> None:
        self.columns = columns

    def fit(self,X,y=None):
        return self
    
    def transform(self,X,y=None):
        return X.drop(columns=self.columns,axis=0)

In [None]:
class AtributosEncoder(BaseEstimator,TransformerMixin):
    def __init__(self,columns:list|str):
        self.columns = columns

    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        for column in self.columns:
            X[column] = LabelEncoder()\
                .fit_transform(X[column].values.reshape(-1, 1))
        return X   

In [None]:
class Discretização(BaseEstimator, TransformerMixin):
    '''
    column: é a coluna que deseja discretizar
    name  : Nome da nova coluna
    bins  : é o intervalo ou lista de intervalos
    labels: Valores categoricos
    '''
    def __init__(self, column:str, name:str,bins:list, labels:list=None) -> None:
        self.column = column
        self.name   = name
        self.bins   = bins
        self.labels = labels

    def fit(self, X, y=None):
        return self
    
    def transform(self, X:pd.DataFrame, y=None):
        label = None if self.labels == None else self.labels
        new_column   = pd.cut( x=X[self.column], bins = self.bins, labels = label )
        X[self.name] = new_column        
        return X     

In [None]:
class TransformancaoTarget(BaseEstimator,TransformerMixin):
    def __init__(self, column, dtype=1):
        self.column = column
        self.dtype  = dtype

    def fit(self,X,y=None):
        return self
    
    def transform(self,X:pd.DataFrame,y=None):
        X = X.drop(index=X.loc[X[self.column]==0].index,axis=1)

        if self.dtype == 1:
            X['price'] = np.log1p(X[self.column])
        else:
            transformation = QuantileTransformer(output_distribution='normal', random_state=42) 
            X['price']= transformation.fit_transform(X['price'].values.reshape(-1,1))

        
        return X