<a href="https://colab.research.google.com/github/shin0105/4YP/blob/master/RFregression_Boston.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bayesian-optimization
!pip install GPyOpt

In [12]:
#import packages
from sklearn.model_selection import train_test_split,cross_val_score,StratifiedKFold
from sklearn import ensemble
from sklearn.metrics import mean_squared_error,accuracy_score
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import matplotlib.pyplot as plt
from bayes_opt import BayesianOptimization

In [3]:
#reading csv file for Boston Housing Dataset
from google.colab import files
uploaded = files.upload()
import io
df = pd.read_csv(io.BytesIO(uploaded['hou_all.csv']),\
                 names = ['CRIM','ZN','INDUS','CHAS','NOS','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT','MEDV'])
df.head()

Saving hou_all.csv to hou_all.csv


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOS,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [4]:
#parameters = CRIM , ZN, INDUS
X = df.iloc[:,0:3].to_numpy()

#target = MEDV
Y = df.iloc[:,-1].to_numpy()

#train test split
xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size = 0.3, random_state = 10)

#building rf regression model
rf = RandomForestRegressor()
rf.fit(xTrain,yTrain)
yPred = rf.predict(xTest)

#calculating loss - MSE
MSE = mean_squared_error(yPred,yTest)

In [10]:
def bo_params_rf(max_samples,n_estimators,max_features):
    
    params = {
        'max_samples': max_samples,
        'max_features':max_features,
        'n_estimators':int(n_estimators)
    }
    clf = RandomForestRegressor(max_samples=params['max_samples'],max_features=params['max_features'],n_estimators=params['n_estimators'])
    clf.fit(xTrain,yTrain)
    yPred = clf.predict(xTest)
    
    #using negative MSE as score (loss function)
    score = -mean_squared_error(yPred,yTest)
    return score

In [33]:
#boundary for Bayesian optimization
bounds = {'max_samples':(0.5,1),
          'max_features':(0.5,1),
          'n_estimators':(100,200)}

rf_bo = BayesianOptimization(bo_params_rf, bounds)

results = rf_bo.maximize(n_iter=20, init_points=5)

|   iter    |  target   | max_fe... | max_sa... | n_esti... |
-------------------------------------------------------------
| [0m1        [0m | [0m-62.73   [0m | [0m0.9463   [0m | [0m0.7309   [0m | [0m139.8    [0m |
| [95m2        [0m | [95m-62.38   [0m | [95m0.8209   [0m | [95m0.6473   [0m | [95m186.6    [0m |
| [0m3        [0m | [0m-64.12   [0m | [0m0.8604   [0m | [0m0.9446   [0m | [0m163.9    [0m |
| [95m4        [0m | [95m-60.9    [0m | [95m0.5147   [0m | [95m0.5962   [0m | [95m129.4    [0m |
| [0m5        [0m | [0m-62.37   [0m | [0m0.5308   [0m | [0m0.795    [0m | [0m127.9    [0m |
| [0m6        [0m | [0m-65.68   [0m | [0m0.9592   [0m | [0m0.9663   [0m | [0m130.0    [0m |
| [0m7        [0m | [0m-63.52   [0m | [0m0.647    [0m | [0m0.8853   [0m | [0m132.7    [0m |
| [0m8        [0m | [0m-63.41   [0m | [0m0.731    [0m | [0m0.7409   [0m | [0m155.6    [0m |
| [0m9        [0m | [0m-64.01   [0m | [0m0.55

In [34]:
params = rf_bo.max['params']
params['n_estimators']= int(params['n_estimators'])
print(params)

{'max_features': 0.7203624219872533, 'max_samples': 0.5093120201090363, 'n_estimators': 114}


In [40]:
#using GPyOpt
import GPyOpt

bounds = [ {'name': 'max_samples', 'type': 'continuous', 'domain': (0.5, 1)},
        {'name': 'max_features', 'type': 'continuous', 'domain': (0.5, 1)},
        {'name': 'n_estimators', 'type': 'discrete', 'domain': (100, 200)}
      ]

def rf_score(parameters):
  parameters = parameters[0]
  params = {
        'max_samples': parameters[0],
        'max_features':parameters[1],
        'n_estimators':int(parameters[2])
    }
  
  clf = RandomForestRegressor(max_samples=params['max_samples'],max_features=params['max_features'],n_estimators=params['n_estimators'])
  clf.fit(xTrain,yTrain)
  yPred = clf.predict(xTest)
    
  #using negative MSE as score (loss function)
  score = -mean_squared_error(yPred,yTest)
  return score

In [41]:
optimizer = GPyOpt.methods.BayesianOptimization(f=rf_score, 
                                                   domain=bounds,
                                                   acquisition_type ='EI',
                                                   initial_design_numdata = 5,
                                                   model_type='GP',
                                                   acquisition_jitter = 0.05,
                                                   maximize=True
                                                   )

optimizer.run_optimization(max_iter=20)

#minimum MSE
MSE_min=-optimizer.fx_opt
#best parameters
params_min=optimizer.x_opt
print(MSE_min)
print(params_min)

-59.72452424342105
[  0.61149139   0.85901991 100.        ]
