In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.datasets import fetch_california_housing
from sklearn.compose import TransformedTargetRegressor

In [2]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X = X.iloc[:, :-2]
X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467


In [23]:
def convert_to_clipped(f):
    # print("-------------------")
    # print("1. run convert_to_clipped")
    print(f)
    def f_clipped(x):
        # print("-------------------")
        # print("2. inside f_clipped")
        y = f(x)
        y_clipped = np.clip(y, MIN_PRED, MAX_PRED)
        # print("-------------------")
        # print("3. return y_clipped")
        return y_clipped
    # print("-------------------")
    # print("4. return f_clipped in convert_to_clipped")
    return f_clipped

In [24]:
@convert_to_clipped
def exp_clipped(x):
    # print("******************")
    # print("1. run exp_clipped")
    return np.exp(x)

<function exp_clipped at 0x000002416712B7E0>


In [25]:
MIN_PRED, MAX_PRED = 0, 5

target_transformations = [
    # (None, None), 
    # (np.log, np.exp),
    # (np.log, convert_to_clipped(np.exp)),
    # (np.sqrt, np.square), 
    # (np.sqrt, convert_to_clipped(np.square)),  
    (np.log, exp_clipped), 
]

target_transformations = [{"func": func, "inverse_func": inverse_func} for func, inverse_func in target_transformations]
target_transformations

[{'func': <ufunc 'log'>,
  'inverse_func': <function __main__.convert_to_clipped.<locals>.f_clipped(x)>}]

In [26]:
def test_transformation(transformation, X_train, X_test, y_train, y_test):
    model = TransformedTargetRegressor(LinearRegression(), 
                                       func=transformation["func"], 
                                       inverse_func=transformation["inverse_func"])
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    
    return {
        'transform_func': "identity" if  transformation["func"] is None else transformation["func"].__name__,
        'inverse_func': "identity" if transformation["inverse_func"] is None else transformation["inverse_func"].__name__,
        'r2': r2
    }
def test_transformations(transformations: list[dict], X_train, X_test, y_train, y_test):
    results = pd.DataFrame([test_transformation(transformation, X_train, X_test, y_train, y_test) for transformation in transformations])
    return results

In [27]:
# random_state = 123213
random_state = None

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
test_transformations(target_transformations, X_train, X_test, y_train, y_test)



Unnamed: 0,transform_func,inverse_func,r2
0,log,f_clipped,0.537479
