In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor
import sklearn.metrics as mt


In [11]:
data = pd.read_csv("/Users/sevketugurel/Desktop/reklam.csv")
veri = data.copy()

y = veri["Sales"]
X = veri.drop(columns="Sales", axis=1)

X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# DecisionTreeRegressor kullanılıyor
dtModel = DecisionTreeRegressor(random_state=0)
dtModel.fit(X_train, y_train)
dtTahmin = dtModel.predict(x_test)

# BaggingRegressor kullanılıyor
bgModel = BaggingRegressor(random_state=0)
bgModel.fit(X_train, y_train)
bgTahmin = bgModel.predict(x_test)

# RandomForest kullanılıyor
rfModel = RandomForestRegressor(random_state=0)
rfModel.fit(X_train,y_train)
rfTahmin = rfModel.predict(x_test)

In [12]:
r2dt = mt.r2_score(y_test, dtTahmin)
r2bg = mt.r2_score(y_test, bgTahmin)
r2rf = mt.r2_score(y_test, rfTahmin)

rmseDT = mt.mean_squared_error(y_test, dtTahmin, squared=False)
rmseBG = mt.mean_squared_error(y_test, bgTahmin, squared=False)
rmseRF = mt.mean_squared_error(y_test, rfTahmin, squared=False)

print("Karar Ağacı Modeli Modeli R2: {:.3f}  RMSE: {:.3f}".format(r2dt, rmseDT))
print("Bag Modeli R2: {:.3f}  RMSE: {:.3f}".format(r2bg, rmseBG))
print("Random Forest Modeli R2: {:.3f}  RMSE: {:.3f}".format(r2rf, rmseRF))


Karar Ağacı Modeli Modeli R2: 0.879  RMSE: 1.931
Bag Modeli R2: 0.951  RMSE: 1.227
Random Forest Modeli R2: 0.954  RMSE: 1.193


In [14]:
dtParametreler = {"min_samples_split":range(2,20),"max_leaf_nodes":range(2,20)}
dtGrid = GridSearchCV(estimator=dtModel,param_grid=dtParametreler,cv = 10,n_jobs=-1)
dtGrid.fit(X_train,y_train)
print("Karar Ağacı Best Params: ",dtGrid.best_params_)

bgParametreler = {"n_estimators":range(2,20)}
bgGrid = GridSearchCV(estimator=bgModel,param_grid=bgParametreler,cv = 10,n_jobs=-1)
bgGrid.fit(X_train,y_train) 
print("Bagging Best Params: ",bgGrid.best_params_)

rfParametreler = {"max_depth":range(2,20),"max_features":range(2,20),"n_estimators":range(2,20)}
rfGrid = GridSearchCV(estimator=rfModel,param_grid=rfParametreler,cv = 10,n_jobs=-1)
rfGrid.fit(X_train,y_train)
print("Random Forest Best Params: ",rfGrid.best_params_)

Karar Ağacı Best Params:  {'max_leaf_nodes': 18, 'min_samples_split': 4}
Bagging Best Params:  {'n_estimators': 18}


KeyboardInterrupt: 

In [15]:
'''
Karar Ağacı Best Params:  {'max_leaf_nodes': 18, 'min_samples_split': 4}
Bagging Best Params:  {'n_estimators': 18}
Random Forest Params : {"max_depth":13,"max_features":2,"n_estimators":19)}
'''

'\nKarar Ağacı Best Params:  {\'max_leaf_nodes\': 18, \'min_samples_split\': 4}\nBagging Best Params:  {\'n_estimators\': 18}\nRandom Forest Params : {"max_depth":13,"max_features":2,"n_estimators":19)}\n'

In [16]:
# DecisionTreeRegressor kullanılıyor
dtModel = DecisionTreeRegressor(random_state=0,max_leaf_nodes=18,min_samples_leaf=4)
dtModel.fit(X_train, y_train)
dtTahmin = dtModel.predict(x_test)

# BaggingRegressor kullanılıyor
bgModel = BaggingRegressor(random_state=0,n_estimators=18)
bgModel.fit(X_train, y_train)
bgTahmin = bgModel.predict(x_test)

# RandomForest kullanılıyor
rfModel = RandomForestRegressor(random_state=0,max_depth=13,max_features=2,n_estimators=19)
rfModel.fit(X_train,y_train)
rfTahmin = rfModel.predict(x_test)

In [17]:
r2dt = mt.r2_score(y_test, dtTahmin)
r2bg = mt.r2_score(y_test, bgTahmin)
r2rf = mt.r2_score(y_test, rfTahmin)

rmseDT = mt.mean_squared_error(y_test, dtTahmin, squared=False)
rmseBG = mt.mean_squared_error(y_test, bgTahmin, squared=False)
rmseRF = mt.mean_squared_error(y_test, rfTahmin, squared=False)

print("Karar Ağacı Modeli Modeli R2: {:.3f}  RMSE: {:.3f}".format(r2dt, rmseDT))
print("Bag Modeli R2: {:.3f}  RMSE: {:.3f}".format(r2bg, rmseBG))
print("Random Forest Modeli R2: {:.3f}  RMSE: {:.3f}".format(r2rf, rmseRF))


Karar Ağacı Modeli Modeli R2: 0.944  RMSE: 1.316
Bag Modeli R2: 0.954  RMSE: 1.187
Random Forest Modeli R2: 0.956  RMSE: 1.165


In [18]:
## Random Forest Fonksiyonları en verimli olan algoritma olarak bilinir.