In [1]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import datasets

In [2]:
data = datasets.load_iris()
features = data.data
targets = data.target

In [3]:
feature_train,feature_test,target_train,target_test = train_test_split(features, targets, test_size=0.2)

In [4]:
model = AdaBoostClassifier(n_estimators=100, learning_rate=1, random_state=123)

In [5]:
modelfitted = model.fit(feature_train, target_train)
pred = modelfitted.predict(feature_test)

In [6]:
print(confusion_matrix(target_test, pred))
print(accuracy_score(target_test, pred))

[[11  0  0]
 [ 0  8  1]
 [ 0  2  8]]
0.9


In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing

In [10]:
data = pd.read_csv("data/wine.csv", sep=";")
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [11]:
def is_tasty(q):
    if q >= 7:
        return 1
    else:
        return 0

In [15]:
features = data[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol']]
data['tasty'] = data['quality'].apply(is_tasty)
targets = data['tasty']

In [16]:
X = np.array(features).reshape(-1,11)
y = np.array(targets).reshape(-1,1)

In [17]:
print(X)

[[ 7.    0.27  0.36 ...  3.    0.45  8.8 ]
 [ 6.3   0.3   0.34 ...  3.3   0.49  9.5 ]
 [ 8.1   0.28  0.4  ...  3.26  0.44 10.1 ]
 ...
 [ 6.5   0.24  0.19 ...  2.99  0.46  9.4 ]
 [ 5.5   0.29  0.3  ...  3.34  0.38 12.8 ]
 [ 6.    0.21  0.38 ...  3.26  0.32 11.8 ]]


In [18]:
X = preprocessing.MinMaxScaler().fit_transform(X)

In [19]:
print(X)

[[0.30769231 0.18627451 0.21686747 ... 0.25454545 0.26744186 0.12903226]
 [0.24038462 0.21568627 0.20481928 ... 0.52727273 0.31395349 0.24193548]
 [0.41346154 0.19607843 0.24096386 ... 0.49090909 0.25581395 0.33870968]
 ...
 [0.25961538 0.15686275 0.11445783 ... 0.24545455 0.27906977 0.22580645]
 [0.16346154 0.20588235 0.18072289 ... 0.56363636 0.18604651 0.77419355]
 [0.21153846 0.12745098 0.22891566 ... 0.49090909 0.11627907 0.61290323]]


In [20]:
feature_train,feature_test,target_train,target_test = train_test_split(features, targets, test_size=0.2)

In [26]:
params = {
    'n_estimators' : [10,50,100,200], 
    'learning_rate' : [0.01,0.05,0.3,1],
}

In [27]:
estimator = AdaBoostClassifier()
grid_search = GridSearchCV(estimator=estimator, param_grid=params, cv=10)

In [28]:
grid_search.fit(feature_train, target_train)

In [29]:
preds = grid_search.predict(feature_test)

In [30]:
print(confusion_matrix(target_test, preds))
print(accuracy_score(target_test, preds))

[[761  16]
 [154  49]]
0.826530612244898
