Ptzetestować jedną pętlą:
- czystą regresję liniową
- regresję liniowąz zlogarytmowaną zmienną celu
- pipeline z przeksztalceniem potęgowym zmiennych objaśniających (X)
- połaczenie 2 i 3

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.datasets import fetch_california_housing
from sklearn.compose import TransformedTargetRegressor
from sklearn.preprocessing import PowerTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

In [2]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X = X.iloc[:, :-2]
X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467


In [7]:
random_state = None

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

In [8]:
def evaluate_pipeline(X_train, X_test, y_train, y_test, pipeline_args):
    pipeline = Pipeline([
        ('transformer', pipeline_args.get('transformer')),
        ('regressor', pipeline_args.get('regressor'))
    ])
    pipeline.fit(X_train,y_train)
    return r2_score(y_test, pipeline.predict(X_test))

In [9]:
experiments = {
    'lr' : {
        'transformer' : None,
        'regressor' : LinearRegression()
    },
    'lr_log_y' : {
        'transformer' : None,
        'regressor' : TransformedTargetRegressor(
            LinearRegression(),
            func = np.log,
            inverse_func = np.exp
        )
    },
    'lr_pwr_transf_X' : {
        'transformer' : PowerTransformer(),
        'regressor' : LinearRegression()
    },
    'lr_log_y_pwr_transf_X' : {
        'transformer' : PowerTransformer(),
        'regressor' : TransformedTargetRegressor(
            LinearRegression(),
            func = np.log,
            inverse_func = np.exp
        )
    }
}

In [12]:
experiments_scores = {}
for experiment, pipeline_args in experiments.items():
    experiments_scores[experiment] = evaluate_pipeline(
        X_train,
        X_test,
        y_train,
        y_test,
        pipeline_args
    )

In [13]:
pd.DataFrame(experiments_scores,index = ['r2_score']).T

Unnamed: 0,r2_score
lr,0.538602
lr_log_y,-0.113824
lr_pwr_transf_X,0.568519
lr_log_y_pwr_transf_X,0.605712


### ***********************************************************

In [15]:
pipelines_to_test = [
    [('model', LinearRegression())],
    [('model', TransformedTargetRegressor(func=np.log1p, inverse_func=np.expm1))],
    [('exp_transform', PolynomialFeatures(2)), ('model', LinearRegression())],
    [('exp_transform', PolynomialFeatures(2)), ('model', TransformedTargetRegressor(func=np.log1p, inverse_func=np.expm1))]
]

for pipe in pipelines_to_test:
    pipeline = Pipeline(pipe)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    print(f"{r2_score(y_true=y_test, y_pred=y_pred)=}")

r2_score(y_true=y_test, y_pred=y_pred)=0.5386023261548197
r2_score(y_true=y_test, y_pred=y_pred)=0.3605590719280388
r2_score(y_true=y_test, y_pred=y_pred)=0.6012032681228154
r2_score(y_true=y_test, y_pred=y_pred)=0.5973128759650113


### ***********************************************************

In [18]:
regressors = [
    LinearRegression(),
    TransformedTargetRegressor(LinearRegression(), func=np.log1p, inverse_func=np.expm1),
    Pipeline([
         ("power_transformer", PowerTransformer()),
         ("model", LinearRegression())
    ]),
    Pipeline([
         ("power_transformer", PowerTransformer()),
         ("model", TransformedTargetRegressor(LinearRegression(), func=np.log1p, inverse_func=np.expm1))
    ]),
]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
results = []
for regressor in regressors:
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    results.append({
        'model': regressor,
        'r2': r2_score(y_test, y_pred)
    }) 

pd.DataFrame(results)

Unnamed: 0,model,r2
0,LinearRegression(),0.546378
1,TransformedTargetRegressor(func=<ufunc 'log1p'...,0.28742
2,"(PowerTransformer(), LinearRegression())",0.578175
3,"(PowerTransformer(), TransformedTargetRegresso...",0.620399


### ***********************************************************