# Transformacje zmiennej celu i automatyzacja eksperymentów
Zaimplementuj testy modelu regresji liniowej z różnymi transformacjami zmiennej celu (w zestawieniu uwzględnij też model bez transformacji). Użyj klasy `TranformedTargetRegressor` z modulu `sklearn.compose`. Nie używaj klasy `GridSearchCV`. Zaimplementuj funkcję, która zwróci tabelkę z wynikami testów - informacją jakie przekształcenia dało jakie wyniki (metryka r-kwadrat), a arguemntem będzie lista transformacji do rozważenia (+ inne konieczne zmienne).
Zaimplementuj testy tak, aby rozszerzanie ich o inne transformacje wymagało minimalnego nakładu pracy.

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.compose import TransformedTargetRegressor

In [2]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X = X.iloc[:, :-2]
print(X)

       MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup
0      8.3252      41.0  6.984127   1.023810       322.0  2.555556
1      8.3014      21.0  6.238137   0.971880      2401.0  2.109842
2      7.2574      52.0  8.288136   1.073446       496.0  2.802260
3      5.6431      52.0  5.817352   1.073059       558.0  2.547945
4      3.8462      52.0  6.281853   1.081081       565.0  2.181467
...       ...       ...       ...        ...         ...       ...
20635  1.5603      25.0  5.045455   1.133333       845.0  2.560606
20636  2.5568      18.0  6.114035   1.315789       356.0  3.122807
20637  1.7000      17.0  5.205543   1.120092      1007.0  2.325635
20638  1.8672      18.0  5.329513   1.171920       741.0  2.123209
20639  2.3886      16.0  5.254717   1.162264      1387.0  2.616981

[20640 rows x 6 columns]


In [89]:
def transform_log(x):
    return np.log1p(x)  # log(1 + x)

def inverse_log(x):
    return np.expm1(x)  # exp(x) - 1

def transform_sqrt(x):
    return np.sqrt(x)

def inverse_sqrt(x):
    return np.power(x, 2)

In [90]:
X.shape, y.shape

((20640, 6), (20640,))

In [100]:
# Główna funkcja do testowania różnych transformacji
def test_regression_with_transformations(X, y, transformations):
    results = []  # Lista na wyniki

    # Podział danych na treningowe i testowe
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Testowanie modelu bez transformacji
    # model = LinearRegression()
    # model.fit(X_train, y_train)
    # y_pred = model.predict(X_test)
    # r2 = r2_score(y_test, y_pred)
    # results.append({'Transformation': 'None', 'R^2': r2})

    # Testowanie modeli z transformacjami
    for name, (transform, inverse_transform) in transformations.items():
        print(name)
        # print(transform)
        # print(inverse_transform)
        # Stworzenie modelu z przekształceniami
        model = TransformedTargetRegressor(regressor=LinearRegression(),func=transform, inverse_func=inverse_transform)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        r2 = r2_score(y_test, y_pred)
        # print(results)
        # Zapisanie wyników
        results.append({'Transformation': name, 'R^2': r2})
        # print(results)

    # Zwrócenie wyników jako DataFrame
    return pd.DataFrame(results)

In [101]:
# Przykładowe dane
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X = X.iloc[:, :-2]
print(X)

       MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup
0      8.3252      41.0  6.984127   1.023810       322.0  2.555556
1      8.3014      21.0  6.238137   0.971880      2401.0  2.109842
2      7.2574      52.0  8.288136   1.073446       496.0  2.802260
3      5.6431      52.0  5.817352   1.073059       558.0  2.547945
4      3.8462      52.0  6.281853   1.081081       565.0  2.181467
...       ...       ...       ...        ...         ...       ...
20635  1.5603      25.0  5.045455   1.133333       845.0  2.560606
20636  2.5568      18.0  6.114035   1.315789       356.0  3.122807
20637  1.7000      17.0  5.205543   1.120092      1007.0  2.325635
20638  1.8672      18.0  5.329513   1.171920       741.0  2.123209
20639  2.3886      16.0  5.254717   1.162264      1387.0  2.616981

[20640 rows x 6 columns]


In [105]:
# Definicja transformacji (nazwa + funkcje)
transformations = {
    'None': (None, None),
    'Log': (transform_log, inverse_log),
    'Sqrt': (transform_sqrt, inverse_sqrt),
}

# Wywołanie funkcji
results_df = test_regression_with_transformations(X, y, transformations)

# Wyświetlenie wyników
print(results_df)

None
Log
Sqrt
  Transformation       R^2
0           None  0.509934
1            Log  0.153178
2           Sqrt  0.410320


### ---------------------------------------------------------------------------------

In [73]:
def test_transformation(transform_func, inverse_func, X_train, X_test, y_train, y_test):
    model = TransformedTargetRegressor(LinearRegression(), func=transform_func, inverse_func=inverse_func)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return {
        'transform_func': transform_func,
        'inverse_func': inverse_func,
        'r2': r2
    }
def test_transformations(transformations: list[tuple], X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    return pd.DataFrame([test_transformation(transformation[0], transformation[1], X_train, X_test, y_train, y_test) for transformation in transformations])

In [74]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X = X.drop(columns=['Latitude', 'Longitude'])

In [82]:
results = test_transformations(
    [(None, None), (np.log, np.exp),(np.sqrt, np.square)],
     X,
     y
)
results

Unnamed: 0,transform_func,inverse_func,r2
0,,,0.527292
1,<ufunc 'log'>,<ufunc 'exp'>,-1.236635
2,<ufunc 'sqrt'>,<ufunc 'square'>,0.430965


### ---------------------------------------------------------------------------------

In [95]:
target_transformations = [(None, None), (np.log, np.exp),(np.sqrt, np.square)]
target_transformations = [{"func": func, "inverse_func": inverse_func} for func, inverse_func in target_transformations]
target_transformations

[{'func': None, 'inverse_func': None},
 {'func': <ufunc 'log'>, 'inverse_func': <ufunc 'exp'>},
 {'func': <ufunc 'sqrt'>, 'inverse_func': <ufunc 'square'>}]

In [252]:
MIN_PRED, MAX_PRED = 0, 5

target_transformations = [
    (None, None), 
    (np.log, np.exp),
    (np.log, lambda x: np.clip(np.exp(x), MIN_PRED, MAX_PRED)),
    (np.sqrt, np.square), 
    (identity, lambda y: np.clip(y, MIN_PRED, MAX_PRED))
]
target_transformations = [{"func": func, "inverse_func": inverse_func} for func, inverse_func in target_transformations]
target_transformations

[{'func': None, 'inverse_func': None},
 {'func': <ufunc 'log'>, 'inverse_func': <ufunc 'exp'>},
 {'func': <ufunc 'log'>, 'inverse_func': <function __main__.<lambda>(x)>},
 {'func': <ufunc 'sqrt'>, 'inverse_func': <ufunc 'square'>},
 {'func': <function __main__.identity(x)>,
  'inverse_func': <function __main__.<lambda>(y)>}]

In [253]:
def test_transformation(transformation, X_train, X_test, y_train, y_test):
    model = TransformedTargetRegressor(LinearRegression(), 
                                       func=transformation["func"], 
                                       inverse_func=transformation["inverse_func"])
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    
    return {
        'transform_func': "identity" if  transformation["func"] is None else transformation["func"].__name__,
        'inverse_func': "identity" if transformation["inverse_func"] is None else transformation["inverse_func"].__name__,
        'r2': r2
    }
def test_transformations(transformations: list[dict], X_train, X_test, y_train, y_test):
    results = pd.DataFrame([test_transformation(transformation, X_train, X_test, y_train, y_test) for transformation in transformations])
    return results

X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X = X.iloc[:, :-2]

# random_state = 123213
random_state = None
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
test_transformations(target_transformations, X_train, X_test, y_train, y_test)



Unnamed: 0,transform_func,inverse_func,r2
0,identity,identity,0.534294
1,log,exp,-0.223176
2,log,<lambda>,0.510607
3,sqrt,square,0.474427
4,identity,<lambda>,0.55332


## Można tu użyć dekoratora lub zdefiniowac funkcję, która będzie zwracała funkcję przyciętą

In [257]:
def convert_to_clipped(f):
    def f_clipped(x):
        y = f(x)
        y_clipped = np.clip(y, MIN_PRED, MAX_PRED)
        return y_clipped
    return f_clipped

In [258]:
MIN_PRED, MAX_PRED = 0, 5

target_transformations = [
    (None, None), 
    (np.log, np.exp),
    (np.log, convert_to_clipped(np.exp)),
    (np.sqrt, np.square), 
    (np.sqrt, convert_to_clipped(np.square)), 
]

target_transformations = [{"func": func, "inverse_func": inverse_func} for func, inverse_func in target_transformations]
target_transformations

[{'func': None, 'inverse_func': None},
 {'func': <ufunc 'log'>, 'inverse_func': <ufunc 'exp'>},
 {'func': <ufunc 'log'>,
  'inverse_func': <function __main__.convert_to_clipped.<locals>.f_clipped(x)>},
 {'func': <ufunc 'sqrt'>, 'inverse_func': <ufunc 'square'>},
 {'func': <ufunc 'sqrt'>,
  'inverse_func': <function __main__.convert_to_clipped.<locals>.f_clipped(x)>}]

In [259]:
def test_transformation(transformation, X_train, X_test, y_train, y_test):
    model = TransformedTargetRegressor(LinearRegression(), 
                                       func=transformation["func"], 
                                       inverse_func=transformation["inverse_func"])
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    
    return {
        'transform_func': "identity" if  transformation["func"] is None else transformation["func"].__name__,
        'inverse_func': "identity" if transformation["inverse_func"] is None else transformation["inverse_func"].__name__,
        'r2': r2
    }
def test_transformations(transformations: list[dict], X_train, X_test, y_train, y_test):
    results = pd.DataFrame([test_transformation(transformation, X_train, X_test, y_train, y_test) for transformation in transformations])
    return results

X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X = X.iloc[:, :-2]

# random_state = 123213
random_state = None
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
test_transformations(target_transformations, X_train, X_test, y_train, y_test)



Unnamed: 0,transform_func,inverse_func,r2
0,identity,identity,0.528756
1,log,exp,-0.098357
2,log,f_clipped,0.518923
3,sqrt,square,0.486382
4,sqrt,f_clipped,0.554137
