In [82]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn import metrics
import numpy as np



In [83]:
final_df = pd.read_csv('df_final.csv')

In [84]:

final_df_10 = final_df.sample(frac=0.10)
X = final_df.drop(['log_price'], axis =1)
y = final_df.log_price

In [85]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [86]:
gb = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)

gb.fit(x_train, y_train)

y_pred_gb = gb.predict(x_test)

mae_gb  = metrics.mean_absolute_error(y_test, y_pred_gb)
mse_gb  = metrics.mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(metrics.mean_squared_error(y_test, y_pred_gb))
r2_gb   = metrics.r2_score(y_test, y_pred_gb)


print('\nMean Absolute Error of Gradient Boosting     : ', mae_gb)
print('\nMean Squarred Error of Gradient Boosting     : ', mse_gb)
print('\nRoot Mean Squarred Error of Gradient Boosting: ', rmse_gb)
print('\nR2 Score of Gradient Boosting                : ', r2_gb)


Mean Absolute Error of Gradient Boosting     :  0.3059253650749348

Mean Squarred Error of Gradient Boosting     :  0.1696451814795339

Root Mean Squarred Error of Gradient Boosting:  0.41188005715199894

R2 Score of Gradient Boosting                :  0.6613985941338612


In [87]:
# Find important columns
feature_importances = gb.feature_importances_
sorted_feature_importances = sorted(zip(feature_importances, X.columns), reverse=True)
important_columns = [feature_name for importance, feature_name in sorted_feature_importances if importance > 0.004]
len(important_columns)


18

In [64]:
# Tuning control

# X = final_df_10[important_columns]
# y = final_df_10.log_price

In [88]:
# Final tuning

X = final_df[important_columns]
y = final_df.log_price

In [89]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Grid Search için parametreler ve değer aralıkları belirleyin
param_grid = {
    'n_estimators': [900,1100,1200],  # Ağaç sayısı
    'learning_rate': [0.01],  # Öğrenme oranı
    'max_depth': [1,3,5],  # Ağaçların maksimum derinliği
    'min_samples_split': [12],  # Ağaçları bölmek için minimum örnek sayısı
}

# Gradient Boosting Regressor modelini oluşturun
gbr_model = GradientBoostingRegressor(random_state=42)

# Grid Search işlemini gerçekleştirin
grid_search = GridSearchCV(estimator=gbr_model, param_grid=param_grid, cv=3)
grid_search.fit(X_train, y_train)

# En iyi parametreleri ve sonucu gösterin
print("En iyi parametreler:", grid_search.best_params_)
print("En iyi R^2 skoru:", grid_search.best_score_)

# En iyi modeli alın
best_model = grid_search.best_estimator_

# Test verileri üzerinde performansını değerlendirin
y_pred = best_model.predict(X_test)

mae  = metrics.mean_absolute_error(y_test, y_pred)
mse  = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
r2   = metrics.r2_score(y_test, y_pred)

print('\nMean Absolute Error      : ', mae)
print('\nMean Squarred Error      : ', mse)
print('\nRoot Mean Squarred       : ', rmse)
print('\nR2 Score                 : ', r2)




En iyi parametreler: {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 200}
En iyi R^2 skoru: 0.6453870379463805
Mean Squared Error (MSE): 0.14897454776957175

In [32]:
gbm_tuned = GradientBoostingRegressor(learning_rate = 0.01,  
                                      max_depth = 7, 
                                      n_estimators = 300, 
                                      subsample = 0.5,
                                      min_samples_split = 12)

gbm_tuned = gbm_tuned.fit(X_train,y_train)
y_pred = gbm_tuned.predict(X_test)

In [34]:

mae  = metrics.mean_absolute_error(y_test, y_pred)
mse  = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
r2   = metrics.r2_score(y_test, y_pred)


print('\nMean Absolute Error of Gradient Boosting     : ', mae)
print('\nMean Squarred Error of Gradient Boosting     : ', mse)
print('\nRoot Mean Squarred Error of Gradient Boosting: ', rmse)
print('\nR2 Score of Gradient Boosting                : ', r2)


Mean Absolute Error of Gradient Boosting     :  0.28081734627831795

Mean Squarred Error of Gradient Boosting     :  0.1458273350655173

Root Mean Squarred Error of Gradient Boosting:  0.3818734542561414

R2 Score of Gradient Boosting                :  0.7089375587549267


In [None]:
''' Best param with important columns

Mean Absolute Error of Gradient Boosting     :  0.31347978774129803

Mean Squarred Error of Gradient Boosting     :  0.1799604177081631

Root Mean Squarred Error of Gradient Boosting:  0.42421741796885604

R2 Score of Gradient Boosting                :  0.640810013554126


    Best param all columns
    
Mean Absolute Error of Gradient Boosting     :  0.28081734627831795

Mean Squarred Error of Gradient Boosting     :  0.1458273350655173

Root Mean Squarred Error of Gradient Boosting:  0.3818734542561414

R2 Score of Gradient Boosting                :  0.7089375587549267


    default params all columns


Mean Absolute Error of Gradient Boosting     :  0.3059637993918744

Mean Squarred Error of Gradient Boosting     :  0.1696998722661488

Root Mean Squarred Error of Gradient Boosting:  0.41194644344398557

R2 Score of Gradient Boosting                :  0.6612894346689461


    default params important columns


Mean Absolute Error of Gradient Boosting     :  0.32492603663969094

Mean Squarred Error of Gradient Boosting     :  0.1918796071381001

Root Mean Squarred Error of Gradient Boosting:  0.43804064553201005

R2 Score of Gradient Boosting                :  0.6170200404905624



0.001
Mean Absolute Error of Gradient Boosting     :  0.30602696567315296

Mean Squarred Error of Gradient Boosting     :  0.17010975095859518

Root Mean Squarred Error of Gradient Boosting:  0.4124436336744637

R2 Score of Gradient Boosting                :  0.6604713418691004



0.005
Mean Absolute Error of Gradient Boosting     :  0.31335393396530126

Mean Squarred Error of Gradient Boosting     :  0.17809808695935264

Root Mean Squarred Error of Gradient Boosting:  0.42201669038007567

R2 Score of Gradient Boosting                :  0.6445271118190776



0.004
Mean Absolute Error of Gradient Boosting     :  0.3106074203513541

Mean Squarred Error of Gradient Boosting     :  0.17492027520908043

Root Mean Squarred Error of Gradient Boosting:  0.4182347130608367

R2 Score of Gradient Boosting                :  0.6508698297014002
'''
