# Read

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

train=pd.read_csv('data/train.csv')
test=pd.read_csv('data/test.csv')

# Scailing
scaler=MinMaxScaler()
scaler.fit(train[['fixed acidity']])
train['Scaled fixed acidity']=scaler.transform(train[['fixed acidity']])
test['Scaled fixed acidity']=scaler.transform(test[['fixed acidity']])

#Encoding
encoder=OneHotEncoder()
encoder.fit(train[['type']])
onehot=encoder.transform(train[['type']])
onehot=onehot.toarray()
onehot=pd.DataFrame(onehot)
onehot.columns=encoder.get_feature_names()
train=pd.concat([train,onehot],axis=1)
train=train.drop(columns=['type'])

onehot=encoder.transform(test[['type']])
onehot=onehot.toarray()
onehot=pd.DataFrame(onehot)
onehot.columns=encoder.get_feature_names()
test=pd.concat([test,onehot],axis=1)
test=test.drop(columns=['type'])

test.head()

# Bayesian Optimization

In [None]:
from bayes_opt import BayesianOptimization

In [None]:
X=train.drop(columns=['index','quality'])
y=train['quality']

In [None]:
rf_parameter_bounds={
    'max_depth':(1,3),
    'n_estimators':(30,100),
    }

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def rf_bo(max_depth,n_estimators):
    rf_params={
              'max_depth':int(round(max_depth)),
              'n_estimators':int(round(n_estimators)),
              }
    rf=RandomForestClassifier(**rf_params)
    
    X_train, X_valid, y_train, y_valid = train_test_split(X,y,test_size=0.2, )
    
    rf.fit(X_train,y_train)
    score=accuracy_score(y_valid,rf.predict(X_valid))
    return score

In [None]:
B0_rf=BayesianOptimization(f=rf_bo,pbounds=rf_parameter_bounds,random_state=0)

In [None]:
B0_rf.maximize(init_points=5,n_iter=5)

In [None]:
max_params=B0_rf.max['params']

max_params['max_depth']=int(max_params['max_depth'])
max_params['n_estimators']=int(max_params['n_estimators'])
print(max_params)

In [None]:
B0_tuned_rf=RandomForestClassifier(**max_params)

# XGBoost 

![image.png](attachment:image.png)

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from xgboost import XGBClassifier
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

train=pd.read_csv('data/train.csv')
test=pd.read_csv('data/test.csv')

# Scailing
scaler=MinMaxScaler()
scaler.fit(train[['fixed acidity']])
train['Scaled fixed acidity']=scaler.transform(train[['fixed acidity']])
test['Scaled fixed acidity']=scaler.transform(test[['fixed acidity']])

#Encoding
encoder=OneHotEncoder()
encoder.fit(train[['type']])
onehot=encoder.transform(train[['type']])
onehot=onehot.toarray()
onehot=pd.DataFrame(onehot)
onehot.columns=encoder.get_feature_names()
train=pd.concat([train,onehot],axis=1)
train=train.drop(columns=['type'])

onehot=encoder.transform(test[['type']])
onehot=onehot.toarray()
onehot=pd.DataFrame(onehot)
onehot.columns=encoder.get_feature_names()
test=pd.concat([test,onehot],axis=1)
test=test.drop(columns=['type'])

test.head()

Unnamed: 0,index,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,Scaled fixed acidity,x0_red,x0_white
0,0,9.0,0.31,0.48,6.6,0.043,11.0,73.0,0.9938,2.9,0.38,11.6,0.429752,0.0,1.0
1,1,13.3,0.43,0.58,1.9,0.07,15.0,40.0,1.0004,3.06,0.49,9.0,0.785124,1.0,0.0
2,2,6.5,0.28,0.27,5.2,0.04,44.0,179.0,0.9948,3.19,0.69,9.4,0.22314,0.0,1.0
3,3,7.2,0.15,0.39,1.8,0.043,21.0,159.0,0.9948,3.52,0.47,10.0,0.280992,0.0,1.0
4,4,6.8,0.26,0.26,2.0,0.019,23.5,72.0,0.99041,3.16,0.47,11.8,0.247934,0.0,1.0


In [2]:
from bayes_opt import BayesianOptimization

In [3]:
X=train.drop(columns=['index','quality'])
y=train['quality']

In [4]:
xgb_parameter_bounds={
                    'gamma' : (0,10),
                    'max_depth' : (1,3),
                    'subsample' : (0.5,1)
                    }

xgb_parameter_bounds = {
                      'gamma' : (0,10),
                      'max_depth' : (1,3), # 나무의 깊이
                      'subsample' : (0.5,1)
                      }

In [5]:
def xgb_bo(gamma,max_depth, subsample):
    xgb_params = {
              'gamma' : int(round(gamma)),
              'max_depth' : int(round(max_depth)),
               'subsample' : int(round(subsample)),      
              }
    xgb = XGBClassifier(**xgb_params)
    
    X_train, X_valid, y_train, y_valid = train_test_split(X,y,test_size = 0.2, )
    
    xgb.fit(X_train,y_train)
    score = accuracy_score(y_valid, xgb.predict(X_valid))
    return score

In [6]:
BO_xgb = BayesianOptimization(f = xgb_bo, pbounds = xgb_parameter_bounds,random_state = 0)

In [7]:
BO_xgb.maximize(init_points = 5, n_iter = 5)

|   iter    |  target   |   gamma   | max_depth | subsample |
-------------------------------------------------------------




| [0m 1       [0m | [0m 0.5409  [0m | [0m 5.488   [0m | [0m 2.43    [0m | [0m 0.8014  [0m |




| [95m 2       [0m | [95m 0.5591  [0m | [95m 5.449   [0m | [95m 1.847   [0m | [95m 0.8229  [0m |




| [95m 3       [0m | [95m 0.5645  [0m | [95m 4.376   [0m | [95m 2.784   [0m | [95m 0.9818  [0m |




| [95m 4       [0m | [95m 0.5818  [0m | [95m 3.834   [0m | [95m 2.583   [0m | [95m 0.7644  [0m |




| [0m 5       [0m | [0m 0.5373  [0m | [0m 5.68    [0m | [0m 2.851   [0m | [0m 0.5355  [0m |




| [0m 6       [0m | [0m 0.5391  [0m | [0m 3.521   [0m | [0m 1.754   [0m | [0m 0.832   [0m |




| [95m 7       [0m | [95m 0.6     [0m | [95m 2.303   [0m | [95m 2.915   [0m | [95m 0.6916  [0m |




| [0m 8       [0m | [0m 0.5836  [0m | [0m 2.981   [0m | [0m 2.984   [0m | [0m 0.5205  [0m |




| [0m 9       [0m | [0m 0.003636[0m | [0m 1.617   [0m | [0m 3.0     [0m | [0m 0.5     [0m |




| [0m 10      [0m | [0m 0.5682  [0m | [0m 3.72    [0m | [0m 1.231   [0m | [0m 0.9286  [0m |


In [8]:
xgb_tune=XGBClassifier(gamma=4.376, max_depth=3,subsample=0.9818)
xgb_tune.fit(X,y)

pred=xgb_tune.predict(test.drop(columns=['index']))

sub=pd.read_csv('data/sample_submission.csv')
sub['quality']=pred
sub.to_csv('tune_xgb.csv',index = False)



