In [3]:
# Gerekli kütüphaneler
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PowerTransformer
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

In [4]:
# Veri setini yükle
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

In [5]:
# Dönüşümsüz model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GradientBoostingRegressor().fit(X_train, y_train)
print("Dönüşümsüz R2:", r2_score(y_test, model.predict(X_test)))

Dönüşümsüz R2: 0.7756446042829697


In [7]:
# Sadece sayısal ve sürekli değer içeren pozitif sütunları al
X_positive = X.loc[:, (X > 0).all()]  # tüm değerleri 0'dan büyük olanları al

In [8]:
pt = PowerTransformer(method='box-cox')
X_boxcox_transformed = pd.DataFrame(pt.fit_transform(X_positive), columns=X_positive.columns)

In [9]:
X_rest = X.drop(columns=X_boxcox_transformed.columns)  # geri kalan sütunlar
X_boxcox_final = pd.concat([X_boxcox_transformed, X_rest.reset_index(drop=True)], axis=1)

In [10]:
# Box-Cox Dönüşümü
X_train, X_test, y_train, y_test = train_test_split(X_boxcox_final, y, test_size=0.2, random_state=42)
model = XGBRegressor().fit(X_train, y_train)
print("Box-Cox R2:", r2_score(y_test, model.predict(X_test)))

Box-Cox R2: 0.8301370561019205


In [11]:
# Yeo-Johnson Dönüşümü
pt = PowerTransformer(method='yeo-johnson')
X_yeo = pd.DataFrame(pt.fit_transform(X), columns=X.columns)
X_train, X_test, y_train, y_test = train_test_split(X_yeo, y, test_size=0.2, random_state=42)
model = LGBMRegressor().fit(X_train, y_train)
print("Yeo-Johnson R2:", r2_score(y_test, model.predict(X_test)))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1838
[LightGBM] [Info] Number of data points in the train set: 16512, number of used features: 8
[LightGBM] [Info] Start training from score 2.071947
Yeo-Johnson R2: 0.8359119945123601
