In [33]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from numpy import log10, power, sign, float64, abs, exp, log
from pandas import DataFrame


class FuncTransformer(FunctionTransformer):

    def __init__(
        self,
        func=None,
        inverse_func=None,
        validate=True,
        accept_sparse=False,
        check_inverse=True,
        kw_args=None,
        inv_kw_args=None,) -> None:

        super(FuncTransformer,self).__init__(
                func=None,
                inverse_func=None,
                validate=False,
                accept_sparse=False,
                check_inverse=True,
                kw_args=None,
                inv_kw_args=None)


    def fit(self, X, y=None):
        if type(X) == DataFrame:
            self._features = X.columns
        return super().fit(X, y=None)


    def transform(self, X):
        if type(X) == DataFrame:
            return DataFrame(super().transform(X), columns=self._features)
        else:
            return super().transform(X)         


    def get_feature_names_out(self, input_features=None):
        return self._features.to_numpy()


class LogTransformer(FuncTransformer):
    
    def __init__(
        self,
        func=10,
        validate=True,
        accept_sparse=False,
        check_inverse=True,
        kw_args=None,
        inv_kw_args=None,) -> None:

        log_trans = None
        log_trans_inv = None
        if func == 'e':
            log_trans = lambda X : (log(1+abs(X))*sign(X).astype(float64))
            log_trans_inv = lambda X: (exp(10,abs(X))-1)*sign(X).astype(float64)
        elif func == '10':
            log_trans = lambda X : (log10(1+abs(X))*sign(X).astype(float64))
            log_trans_inv = lambda X: (power(10,abs(X))-1)*sign(X).astype(float64)
                

        super(FuncTransformer,self).__init__(
                func=log_trans,
                inverse_func=log_trans_inv,
                validate=validate,
                accept_sparse=accept_sparse,
                check_inverse=check_inverse,
                kw_args=kw_args,
                inv_kw_args=inv_kw_args)

    

    



In [35]:
import pytest
# from data_prep.func_transformer import FuncTransformer, LogTransformer
from sklearn.pipeline import Pipeline
from pandas import DataFrame, read_csv
import os
import numpy as np

# currentDir = os.path.dirname(__file__)
currentDir = '.'

df = read_csv(f'{currentDir}/data/data_to_transform.csv')
def test_log10():
   log_transformer = LogTransformer(10)
   pipline2 = Pipeline(steps=[('logtransformer', log_transformer)])


   df_log10 = pipline2.fit_transform(df)
   assert type(df_log10) == DataFrame

   df_log10_inv = pipline2.inverse_transform(df_log10)

   df_diff = df_log10_inv - df
   diff_round = np.round(df_diff, 5)
   assert np.count_nonzero(diff_round, axis=None) == 0

test_log10()

In [22]:
log_transformer = LogTransformer()
log_transformer.fit_transform(df)



Unnamed: 0,Moderate Positive Skew,Highly Positive Skew,Moderate Negative Skew,Highly Negative Skew
0,0.278751,0.590516,1.085674,1.001192
1,0.325013,0.597957,1.073459,1.000424
2,0.333816,0.598394,1.072542,1.000266
3,0.354902,0.602095,1.070576,1.000005
4,0.366220,0.603373,1.070153,0.999187
...,...,...,...,...
9995,1.197254,1.237783,-0.599973,-0.628888
9996,1.200152,1.240456,-0.617789,-0.678731
9997,1.211177,1.257750,-0.654875,-0.757240
9998,1.211387,1.270177,-0.755100,-0.824158


In [2]:
class A:
    def __init__(self, attr1):
        self.attr1 = attr1

    def speak(self):
        print(f"this is A, self type is {type(self)}")

class B(A):
    def __init__(self, attr1):
        self.attr1 = attr1

    def speak(self):
        print(f"this is B, self type is {type(self)}")
        super().speak()

b = B(1)
b.speak()

this is A, self type is <class '__main__.B'>
