## Lasso regularisation

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler

## Read Data

In [3]:
data = pd.read_csv('../Kyoto_Train.csv')
data.shape

(124055, 24)

In [4]:
data.head()

Unnamed: 0,Duration,Source,Destination_bytes,Count,Same_srv_rate,Serror_rate,Srv_serror_rate,Dst_host_count,Dst_host_srv_count,Dst_host_same_port_rate,...,Service_code,Flag_code,IDS_detection_code,Malware_detection_code,Ashula_detection_code,Source_IP_Address_code,Destination_IP_Address_code,Start_Time_code,Protocol_code,Label_code
0,2.863309,0,0,0,0.0,0.0,1.0,0,0,0.0,...,6.0,6.0,0.0,0.0,0.0,90847.0,14024.0,25836.0,1.0,0.0
1,0.0,0,0,0,0.0,0.0,1.0,0,0,0.0,...,6.0,6.0,0.0,0.0,0.0,218531.0,8968.0,45541.0,1.0,0.0
2,0.0,0,0,0,0.0,0.0,1.0,0,0,0.0,...,6.0,6.0,0.0,0.0,0.0,176665.0,15997.0,59860.0,1.0,0.0
3,0.0,0,0,0,0.0,0.0,0.67,49,100,0.02,...,6.0,6.0,0.0,0.0,0.0,52769.0,473.0,40649.0,1.0,0.0
4,0.0,0,0,1,1.0,0.0,0.36,0,2,0.0,...,6.0,0.0,0.0,0.0,0.0,65048.0,16609.0,39283.0,1.0,0.0


In [5]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
numerical_vars = list(data.select_dtypes(include=numerics).columns)
data = data[numerical_vars]
data.shape

(124055, 24)

### Train - Test Split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    data.drop(labels=['Label_code'], axis=1),
    data['Label_code'],
    test_size=0.2,
    random_state=0)

X_train.shape, X_test.shape

((99244, 23), (24811, 23))

In [7]:
# linear models benefit from feature scaling

scaler = StandardScaler()
scaler.fit(X_train.fillna(0))

StandardScaler()

### Select features with Lasso

In [8]:
sel_ = SelectFromModel(
    LogisticRegression(C=0.001, penalty='l1', solver='liblinear', random_state=42))

sel_.fit(scaler.transform(X_train), y_train)

SelectFromModel(estimator=LogisticRegression(C=0.001, penalty='l1',
                                             random_state=42,
                                             solver='liblinear'))

In [9]:
# Visualise the index of the # features that were selected

sel_.get_support()

array([False, False, False, False,  True, False,  True,  True,  True,
       False, False, False,  True,  True, False,  True, False, False,
       False, False, False, False,  True])

In [10]:
selected_feat = X_train.columns[(sel_.get_support())]

print('total features: {}'.format((X_train.shape[1])))
print('selected features: {}'.format(len(selected_feat)))
print('features with coefficients shrank to zero: {}'.format(
    np.sum(sel_.estimator_.coef_ == 0)))

total features: 23
selected features: 8
features with coefficients shrank to zero: 15


### Examine coefficients that shrank to zero

In [11]:
# the number of features which coefficient was shrank to zero:
np.sum(sel_.estimator_.coef_ == 0)

15

In [12]:
# we can identify the removed features like this:

removed_feats = X_train.columns[(sel_.estimator_.coef_ == 0).ravel().tolist()]
removed_feats

Index(['Duration', 'Source', 'Destination_bytes', 'Count', 'Serror_rate',
       'Dst_host_same_port_rate', 'Dst_host_serror_rate',
       'Dst_host_srv_serror_rate', 'Service_code', 'IDS_detection_code',
       'Malware_detection_code', 'Ashula_detection_code',
       'Source_IP_Address_code', 'Destination_IP_Address_code',
       'Start_Time_code'],
      dtype='object')

In [13]:
# we can then remove the features from the training and testing set
# like this:

X_train_selected = sel_.transform(X_train)
X_test_selected = sel_.transform(X_test)

X_train_selected.shape, X_test_selected.shape

((99244, 8), (24811, 8))

## Classifiers




In [14]:
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from catboost import CatBoostClassifier

## Metrics Evaluation




In [15]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, f1_score
from sklearn import metrics
from sklearn.model_selection import cross_val_score

### Logistic Regression




In [16]:
%%time
clf_LR = linear_model.LogisticRegression(n_jobs=-1, random_state=42, C=1).fit(X_train_selected, y_train)

CPU times: user 68.8 ms, sys: 182 ms, total: 250 ms
Wall time: 2.32 s


In [17]:
pred_y_test = clf_LR.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_test))

f1 = f1_score(y_test, pred_y_test)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_test)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 0.9245898996412881
F1 Score: 0.3875613747954174
FPR: 0.005694963516639971
TPR: 0.2535331905781585


### Naive Bayes




In [18]:
%%time
clf_NB = GaussianNB(var_smoothing=1e-05).fit(X_train_selected, y_train)

CPU times: user 22.6 ms, sys: 7.7 ms, total: 30.3 ms
Wall time: 28.6 ms


In [19]:
pred_y_testNB = clf_NB.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_testNB))

f1 = f1_score(y_test, pred_y_testNB)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testNB)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 0.8219338196767563
F1 Score: 0.32384450566268747
FPR: 0.13974906567004805
TPR: 0.45310492505353317


### Random Forest




In [24]:
%%time
clf_RF = RandomForestClassifier(random_state=0,max_depth=70,n_estimators=100).fit(X_train_selected, y_train)

CPU times: user 5.77 s, sys: 36.1 ms, total: 5.8 s
Wall time: 5.81 s


In [25]:
pred_y_testRF = clf_RF.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_testRF))

f1 = f1_score(y_test, pred_y_testRF, average='weighted', zero_division=0)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testRF)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 0.9616299222119221
F1 Score: 0.9610807891246134
FPR: 0.01797472859939491
TPR: 0.7653104925053533


### KNN




In [26]:
%%time
clf_KNN = KNeighborsClassifier(algorithm='auto',leaf_size=1,n_neighbors=2,weights='uniform').fit(X_train_selected, y_train)

CPU times: user 1.02 s, sys: 7.09 ms, total: 1.02 s
Wall time: 1.02 s


In [27]:
pred_y_testKNN = clf_KNN.predict(X_test_selected)
print('accuracy_score:', accuracy_score(y_test, pred_y_testKNN))

f1 = f1_score(y_test, pred_y_testKNN)
print('f1:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testKNN)
print('fpr:', fpr[1])
print('tpr:', tpr[1])

accuracy_score: 0.9351900366772803
f1: 0.5721128259712612
fpr: 0.015483182060864922
tpr: 0.4603854389721627


### CatBoost




In [28]:
%%time
clf_CB = CatBoostClassifier(random_state=0,depth=7,iterations=50,learning_rate=0.04).fit(X_train_selected, y_train)

0:	learn: 0.6605933	total: 62.2ms	remaining: 3.05s
1:	learn: 0.6310507	total: 70.8ms	remaining: 1.7s
2:	learn: 0.6034536	total: 79.8ms	remaining: 1.25s
3:	learn: 0.5773886	total: 88.8ms	remaining: 1.02s
4:	learn: 0.5534091	total: 96.4ms	remaining: 868ms
5:	learn: 0.5308031	total: 103ms	remaining: 757ms
6:	learn: 0.5100979	total: 110ms	remaining: 677ms
7:	learn: 0.4903080	total: 117ms	remaining: 614ms
8:	learn: 0.4720281	total: 123ms	remaining: 560ms
9:	learn: 0.4548631	total: 130ms	remaining: 518ms
10:	learn: 0.4385067	total: 136ms	remaining: 483ms
11:	learn: 0.4231741	total: 143ms	remaining: 451ms
12:	learn: 0.4087203	total: 149ms	remaining: 425ms
13:	learn: 0.3948784	total: 156ms	remaining: 402ms
14:	learn: 0.3822946	total: 163ms	remaining: 380ms
15:	learn: 0.3700238	total: 170ms	remaining: 361ms
16:	learn: 0.3587296	total: 176ms	remaining: 342ms
17:	learn: 0.3478065	total: 183ms	remaining: 325ms
18:	learn: 0.3373589	total: 189ms	remaining: 309ms
19:	learn: 0.3274476	total: 196ms	rem

In [29]:
pred_y_testCB = clf_CB.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_testCB))

f1 = f1_score(y_test, pred_y_testCB, average='weighted', zero_division=0)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testCB)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 0.9567933577848535
F1 Score: 0.9535037370980074
FPR: 0.008987364299697456
TPR: 0.6274089935760171


## Model Evaluation




In [30]:
import pandas as pd, numpy as np
test_df = pd.read_csv("../Kyoto_Test.csv")
test_df.shape

(62028, 24)

In [31]:
# Create feature matrix X and target vextor y
y_eval = test_df['Label_code']
X_eval = test_df.drop(columns=['Label_code'])

In [32]:
X_eval = X_eval[selected_feat]

In [33]:
X_eval.shape

(62028, 8)

### Model Evaluation - Logistic Regression




In [34]:
modelLR = linear_model.LogisticRegression(n_jobs=-1, random_state=42, C=1)
modelLR.fit(X_train_selected, y_train)

LogisticRegression(C=1, n_jobs=-1, random_state=42)

In [35]:
# Predict on the new unseen test data
y_evalpredLR = modelLR.predict(X_eval)
y_predLR = modelLR.predict(X_test_selected)

In [36]:
train_scoreLR = modelLR.score(X_train_selected, y_train)
test_scoreLR = modelLR.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreLR)
print("Testing accuracy is ", test_scoreLR)

Training accuracy is  0.924962718149208
Testing accuracy is  0.9245898996412881


In [37]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreLR)
print('F1 Score:',f1_score(y_test, y_predLR))
print('Precision Score:',precision_score(y_test, y_predLR))
print('Recall Score:', recall_score(y_test, y_predLR))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predLR))

Performance measures for test:
--------
Accuracy: 0.9245898996412881
F1 Score: 0.3875613747954174
Precision Score: 0.8222222222222222
Recall Score: 0.2535331905781585
Confusion Matrix:
 [[22348   128]
 [ 1743   592]]


### Cross validation - Logistic Regression




In [38]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.89929 (+/- 0.01673)
F1 Score: 0.09957 (+/- 0.23505)
Precision: 0.27744 (+/- 0.42883)
Recall: 0.06253 (+/- 0.15723)


### Model Evaluation - Naive Bayes



In [39]:
modelNB = GaussianNB(var_smoothing=1e-05)
modelNB.fit(X_train_selected, y_train)

GaussianNB(var_smoothing=1e-05)

In [40]:
# Predict on the new unseen test data
y_evalpredNB = modelNB.predict(X_eval)
y_predNB = modelNB.predict(X_test_selected)

In [41]:
train_scoreNB = modelNB.score(X_train_selected, y_train)
test_scoreNB = modelNB.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreNB)
print("Testing accuracy is ", test_scoreNB)

Training accuracy is  0.8209564306154529
Testing accuracy is  0.8219338196767563


In [42]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreNB)
print('F1 Score:',f1_score(y_test, y_predNB))
print('Precision Score:',precision_score(y_test, y_predNB))
print('Recall Score:', recall_score(y_test, y_predNB))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predNB))

Performance measures for test:
--------
Accuracy: 0.8219338196767563
F1 Score: 0.32384450566268747
Precision Score: 0.2519647535127411
Recall Score: 0.45310492505353317
Confusion Matrix:
 [[19335  3141]
 [ 1277  1058]]


### Cross validation - Naive Bayes




In [43]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.75680 (+/- 0.09031)
F1 Score: 0.35304 (+/- 0.02515)
Precision: 0.24057 (+/- 0.03842)
Recall: 0.68206 (+/- 0.17373)


### Model Evaluation - Random Forest




In [44]:
modelRF = RandomForestClassifier(random_state=0,max_depth=70,n_estimators=100)
modelRF.fit(X_train_selected, y_train)

RandomForestClassifier(max_depth=70, random_state=0)

In [45]:
# Predict on the new unseen test data
y_evalpredRF = modelRF.predict(X_eval)
y_predRF = modelRF.predict(X_test_selected)

In [46]:
train_scoreRF = modelRF.score(X_train_selected, y_train)
test_scoreRF = modelRF.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreRF)
print("Testing accuracy is ", test_scoreRF)

Training accuracy is  0.999556648260852
Testing accuracy is  0.9616299222119221


In [47]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreRF)
print('F1 Score:', f1_score(y_test, y_predRF, average='weighted', zero_division=0))
print('Precision Score:', precision_score(y_test, y_predRF, average='weighted', zero_division=0))
print('Recall Score:', recall_score(y_test, y_predRF, average='weighted', zero_division=0))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predRF))

Performance measures for test:
--------
Accuracy: 0.9616299222119221
F1 Score: 0.9610807891246134
Precision Score: 0.9607003480279755
Recall Score: 0.9616299222119221
Confusion Matrix:
 [[22072   404]
 [  548  1787]]


### Cross validation - Random Forest




In [48]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.96126 (+/- 0.00477)
F1 Score: 0.79013 (+/- 0.02713)
Precision: 0.82766 (+/- 0.02899)
Recall: 0.75606 (+/- 0.03586)


### Model Evaluation - KNN

In [49]:
modelKNN = KNeighborsClassifier(algorithm='auto',leaf_size=1,n_neighbors=2,weights='uniform')
modelKNN.fit(X_train_selected, y_train)

KNeighborsClassifier(leaf_size=1, n_neighbors=2)

In [50]:
# Predict on the new unseen test data
y_evalpredKNN = modelKNN.predict(X_eval)
y_predKNN = modelKNN.predict(X_test_selected)

In [51]:
train_scoreKNN = modelKNN.score(X_train_selected, y_train)
test_scoreKNN = modelKNN.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreKNN)
print("Testing accuracy is ", test_scoreKNN)

Training accuracy is  0.9624360162831003
Testing accuracy is  0.9351900366772803


In [52]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreKNN)
print('F1 Score:', f1_score(y_test, y_predKNN))
print('Precision Score:', precision_score(y_test, y_predKNN))
print('Recall Score:', recall_score(y_test, y_predKNN))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predKNN))

Performance measures for test:
--------
Accuracy: 0.9351900366772803
F1 Score: 0.5721128259712612
Precision Score: 0.7554462403373156
Recall Score: 0.4603854389721627
Confusion Matrix:
 [[22128   348]
 [ 1260  1075]]


### Cross validation - KNN



In [53]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.93064 (+/- 0.00463)
F1 Score: 0.53407 (+/- 0.03008)
Precision: 0.75988 (+/- 0.05700)
Recall: 0.41203 (+/- 0.02830)


### Model Evaluation - CatBoost




In [54]:
modelCB = CatBoostClassifier(random_state=0,depth=7,iterations=50,learning_rate=0.04)
modelCB.fit(X_train_selected, y_train)

0:	learn: 0.6605933	total: 7.42ms	remaining: 364ms
1:	learn: 0.6310507	total: 14.7ms	remaining: 353ms
2:	learn: 0.6034536	total: 21.8ms	remaining: 341ms
3:	learn: 0.5773886	total: 28.4ms	remaining: 326ms
4:	learn: 0.5534091	total: 35.2ms	remaining: 316ms
5:	learn: 0.5308031	total: 42ms	remaining: 308ms
6:	learn: 0.5100979	total: 48.8ms	remaining: 299ms
7:	learn: 0.4903080	total: 55.7ms	remaining: 293ms
8:	learn: 0.4720281	total: 62.5ms	remaining: 285ms
9:	learn: 0.4548631	total: 68.9ms	remaining: 275ms
10:	learn: 0.4385067	total: 75.5ms	remaining: 268ms
11:	learn: 0.4231741	total: 82.5ms	remaining: 261ms
12:	learn: 0.4087203	total: 89.4ms	remaining: 254ms
13:	learn: 0.3948784	total: 95.8ms	remaining: 246ms
14:	learn: 0.3822946	total: 102ms	remaining: 238ms
15:	learn: 0.3700238	total: 109ms	remaining: 231ms
16:	learn: 0.3587296	total: 115ms	remaining: 224ms
17:	learn: 0.3478065	total: 122ms	remaining: 217ms
18:	learn: 0.3373589	total: 128ms	remaining: 209ms
19:	learn: 0.3274476	total: 1

<catboost.core.CatBoostClassifier at 0x7fd750f1e460>

In [55]:
# Predict on the new unseen test data
y_evalpredCB = modelCB.predict(X_eval)
y_predCB = modelCB.predict(X_test_selected)

In [56]:
train_scoreCB = modelCB.score(X_train_selected, y_train)
test_scoreCB = modelCB.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreCB)
print("Testing accuracy is ", test_scoreCB)

Training accuracy is  0.9572870904034501
Testing accuracy is  0.9567933577848535


In [57]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreCB)
print('F1 Score:',f1_score(y_test, y_predCB, average='weighted', zero_division=0))
print('Precision Score:',precision_score(y_test, y_predCB, average='weighted', zero_division=0))
print('Recall Score:', recall_score(y_test, y_predCB, average='weighted', zero_division=0))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predCB))

Performance measures for test:
--------
Accuracy: 0.9567933577848535
F1 Score: 0.9535037370980074
Precision Score: 0.954542952759011
Recall Score: 0.9567933577848535
Confusion Matrix:
 [[22274   202]
 [  870  1465]]


### Cross validation - CatBoost





In [58]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='accuracy')
f = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='f1')
precision = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='precision')
recall = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='recall')

0:	learn: 0.6609473	total: 5.76ms	remaining: 282ms
1:	learn: 0.6314191	total: 12.4ms	remaining: 297ms
2:	learn: 0.6042015	total: 17.9ms	remaining: 280ms
3:	learn: 0.5787721	total: 23.2ms	remaining: 267ms
4:	learn: 0.5547173	total: 28.5ms	remaining: 257ms
5:	learn: 0.5321631	total: 33.7ms	remaining: 247ms
6:	learn: 0.5114727	total: 39ms	remaining: 239ms
7:	learn: 0.4915906	total: 44.2ms	remaining: 232ms
8:	learn: 0.4732615	total: 49.6ms	remaining: 226ms
9:	learn: 0.4559624	total: 55ms	remaining: 220ms
10:	learn: 0.4397759	total: 60.2ms	remaining: 213ms
11:	learn: 0.4247166	total: 65.5ms	remaining: 207ms
12:	learn: 0.4099731	total: 70.9ms	remaining: 202ms
13:	learn: 0.3961841	total: 76.4ms	remaining: 197ms
14:	learn: 0.3835268	total: 81.8ms	remaining: 191ms
15:	learn: 0.3715237	total: 87.2ms	remaining: 185ms
16:	learn: 0.3600436	total: 92.7ms	remaining: 180ms
17:	learn: 0.3490777	total: 98.3ms	remaining: 175ms
18:	learn: 0.3387313	total: 104ms	remaining: 170ms
19:	learn: 0.3289831	total:

37:	learn: 0.2165637	total: 204ms	remaining: 64.3ms
38:	learn: 0.2126477	total: 209ms	remaining: 58.9ms
39:	learn: 0.2089034	total: 214ms	remaining: 53.5ms
40:	learn: 0.2054542	total: 219ms	remaining: 48.2ms
41:	learn: 0.2018320	total: 225ms	remaining: 42.8ms
42:	learn: 0.1985064	total: 230ms	remaining: 37.5ms
43:	learn: 0.1954108	total: 236ms	remaining: 32.1ms
44:	learn: 0.1927929	total: 241ms	remaining: 26.8ms
45:	learn: 0.1900422	total: 246ms	remaining: 21.4ms
46:	learn: 0.1875497	total: 252ms	remaining: 16.1ms
47:	learn: 0.1850169	total: 258ms	remaining: 10.7ms
48:	learn: 0.1824403	total: 263ms	remaining: 5.37ms
49:	learn: 0.1799977	total: 268ms	remaining: 0us
0:	learn: 0.6609851	total: 5.15ms	remaining: 252ms
1:	learn: 0.6314942	total: 10.6ms	remaining: 255ms
2:	learn: 0.6042811	total: 16.1ms	remaining: 252ms
3:	learn: 0.5787093	total: 21.6ms	remaining: 249ms
4:	learn: 0.5547696	total: 27.1ms	remaining: 244ms
5:	learn: 0.5322658	total: 32.4ms	remaining: 238ms
6:	learn: 0.5115892	t

0:	learn: 0.6609209	total: 5.73ms	remaining: 281ms
1:	learn: 0.6313555	total: 11.1ms	remaining: 267ms
2:	learn: 0.6038788	total: 16.7ms	remaining: 261ms
3:	learn: 0.5785048	total: 22.1ms	remaining: 254ms
4:	learn: 0.5545122	total: 27.3ms	remaining: 246ms
5:	learn: 0.5320108	total: 32.7ms	remaining: 240ms
6:	learn: 0.5113463	total: 38.1ms	remaining: 234ms
7:	learn: 0.4920796	total: 43.2ms	remaining: 227ms
8:	learn: 0.4736773	total: 48.6ms	remaining: 222ms
9:	learn: 0.4563961	total: 54ms	remaining: 216ms
10:	learn: 0.4402336	total: 59.2ms	remaining: 210ms
11:	learn: 0.4250689	total: 64.3ms	remaining: 204ms
12:	learn: 0.4103986	total: 69.4ms	remaining: 198ms
13:	learn: 0.3966527	total: 74.9ms	remaining: 193ms
14:	learn: 0.3840328	total: 80.3ms	remaining: 187ms
15:	learn: 0.3719534	total: 85.6ms	remaining: 182ms
16:	learn: 0.3604441	total: 91.3ms	remaining: 177ms
17:	learn: 0.3494783	total: 96.8ms	remaining: 172ms
18:	learn: 0.3390224	total: 102ms	remaining: 167ms
19:	learn: 0.3290339	tota

15:	learn: 0.3715237	total: 86.5ms	remaining: 184ms
16:	learn: 0.3600436	total: 91.8ms	remaining: 178ms
17:	learn: 0.3490777	total: 97.1ms	remaining: 173ms
18:	learn: 0.3387313	total: 102ms	remaining: 167ms
19:	learn: 0.3289831	total: 108ms	remaining: 161ms
20:	learn: 0.3195964	total: 113ms	remaining: 156ms
21:	learn: 0.3108198	total: 119ms	remaining: 151ms
22:	learn: 0.3024968	total: 124ms	remaining: 146ms
23:	learn: 0.2946457	total: 129ms	remaining: 140ms
24:	learn: 0.2872401	total: 134ms	remaining: 134ms
25:	learn: 0.2797939	total: 140ms	remaining: 129ms
26:	learn: 0.2731569	total: 145ms	remaining: 124ms
27:	learn: 0.2668023	total: 151ms	remaining: 119ms
28:	learn: 0.2607011	total: 157ms	remaining: 113ms
29:	learn: 0.2549487	total: 162ms	remaining: 108ms
30:	learn: 0.2494301	total: 168ms	remaining: 103ms
31:	learn: 0.2437415	total: 173ms	remaining: 97.5ms
32:	learn: 0.2383896	total: 179ms	remaining: 92ms
33:	learn: 0.2332447	total: 184ms	remaining: 86.7ms
34:	learn: 0.2285237	total:

36:	learn: 0.2207979	total: 201ms	remaining: 70.6ms
37:	learn: 0.2165637	total: 207ms	remaining: 65.3ms
38:	learn: 0.2126477	total: 212ms	remaining: 59.8ms
39:	learn: 0.2089034	total: 217ms	remaining: 54.3ms
40:	learn: 0.2054542	total: 222ms	remaining: 48.8ms
41:	learn: 0.2018320	total: 228ms	remaining: 43.4ms
42:	learn: 0.1985064	total: 233ms	remaining: 37.9ms
43:	learn: 0.1954108	total: 238ms	remaining: 32.5ms
44:	learn: 0.1927929	total: 244ms	remaining: 27.1ms
45:	learn: 0.1900422	total: 250ms	remaining: 21.7ms
46:	learn: 0.1875497	total: 255ms	remaining: 16.3ms
47:	learn: 0.1850169	total: 260ms	remaining: 10.9ms
48:	learn: 0.1824403	total: 266ms	remaining: 5.42ms
49:	learn: 0.1799977	total: 271ms	remaining: 0us
0:	learn: 0.6609851	total: 5.53ms	remaining: 271ms
1:	learn: 0.6314942	total: 11.1ms	remaining: 267ms
2:	learn: 0.6042811	total: 16.2ms	remaining: 254ms
3:	learn: 0.5787093	total: 21.7ms	remaining: 249ms
4:	learn: 0.5547696	total: 26.8ms	remaining: 241ms
5:	learn: 0.5322658	

0:	learn: 0.6609209	total: 5.73ms	remaining: 281ms
1:	learn: 0.6313555	total: 11.5ms	remaining: 277ms
2:	learn: 0.6038788	total: 16.8ms	remaining: 264ms
3:	learn: 0.5785048	total: 22.3ms	remaining: 256ms
4:	learn: 0.5545122	total: 27.7ms	remaining: 250ms
5:	learn: 0.5320108	total: 33.2ms	remaining: 243ms
6:	learn: 0.5113463	total: 38.6ms	remaining: 237ms
7:	learn: 0.4920796	total: 44ms	remaining: 231ms
8:	learn: 0.4736773	total: 49.3ms	remaining: 224ms
9:	learn: 0.4563961	total: 54.8ms	remaining: 219ms
10:	learn: 0.4402336	total: 59.9ms	remaining: 212ms
11:	learn: 0.4250689	total: 65.2ms	remaining: 207ms
12:	learn: 0.4103986	total: 70.3ms	remaining: 200ms
13:	learn: 0.3966527	total: 75.8ms	remaining: 195ms
14:	learn: 0.3840328	total: 80.9ms	remaining: 189ms
15:	learn: 0.3719534	total: 86.2ms	remaining: 183ms
16:	learn: 0.3604441	total: 91.4ms	remaining: 177ms
17:	learn: 0.3494783	total: 96.7ms	remaining: 172ms
18:	learn: 0.3390224	total: 102ms	remaining: 167ms
19:	learn: 0.3290339	tota

14:	learn: 0.3835268	total: 81.4ms	remaining: 190ms
15:	learn: 0.3715237	total: 86.7ms	remaining: 184ms
16:	learn: 0.3600436	total: 91.9ms	remaining: 178ms
17:	learn: 0.3490777	total: 97.2ms	remaining: 173ms
18:	learn: 0.3387313	total: 102ms	remaining: 167ms
19:	learn: 0.3289831	total: 108ms	remaining: 161ms
20:	learn: 0.3195964	total: 113ms	remaining: 156ms
21:	learn: 0.3108198	total: 119ms	remaining: 151ms
22:	learn: 0.3024968	total: 124ms	remaining: 146ms
23:	learn: 0.2946457	total: 129ms	remaining: 140ms
24:	learn: 0.2872401	total: 135ms	remaining: 135ms
25:	learn: 0.2797939	total: 140ms	remaining: 129ms
26:	learn: 0.2731569	total: 145ms	remaining: 124ms
27:	learn: 0.2668023	total: 150ms	remaining: 118ms
28:	learn: 0.2607011	total: 156ms	remaining: 113ms
29:	learn: 0.2549487	total: 161ms	remaining: 107ms
30:	learn: 0.2494301	total: 167ms	remaining: 102ms
31:	learn: 0.2437415	total: 172ms	remaining: 96.7ms
32:	learn: 0.2383896	total: 177ms	remaining: 91.3ms
33:	learn: 0.2332447	tota

37:	learn: 0.2165637	total: 203ms	remaining: 64ms
38:	learn: 0.2126477	total: 208ms	remaining: 58.7ms
39:	learn: 0.2089034	total: 213ms	remaining: 53.3ms
40:	learn: 0.2054542	total: 218ms	remaining: 48ms
41:	learn: 0.2018320	total: 224ms	remaining: 42.6ms
42:	learn: 0.1985064	total: 229ms	remaining: 37.3ms
43:	learn: 0.1954108	total: 234ms	remaining: 32ms
44:	learn: 0.1927929	total: 240ms	remaining: 26.6ms
45:	learn: 0.1900422	total: 245ms	remaining: 21.3ms
46:	learn: 0.1875497	total: 250ms	remaining: 16ms
47:	learn: 0.1850169	total: 255ms	remaining: 10.6ms
48:	learn: 0.1824403	total: 261ms	remaining: 5.32ms
49:	learn: 0.1799977	total: 266ms	remaining: 0us
0:	learn: 0.6609851	total: 5.36ms	remaining: 262ms
1:	learn: 0.6314942	total: 11ms	remaining: 264ms
2:	learn: 0.6042811	total: 16.4ms	remaining: 257ms
3:	learn: 0.5787093	total: 21.9ms	remaining: 251ms
4:	learn: 0.5547696	total: 27.1ms	remaining: 244ms
5:	learn: 0.5322658	total: 32.7ms	remaining: 239ms
6:	learn: 0.5115892	total: 38.1

0:	learn: 0.6609209	total: 5.6ms	remaining: 274ms
1:	learn: 0.6313555	total: 11ms	remaining: 265ms
2:	learn: 0.6038788	total: 16.2ms	remaining: 254ms
3:	learn: 0.5785048	total: 21.8ms	remaining: 250ms
4:	learn: 0.5545122	total: 26.9ms	remaining: 243ms
5:	learn: 0.5320108	total: 32.3ms	remaining: 237ms
6:	learn: 0.5113463	total: 37.8ms	remaining: 232ms
7:	learn: 0.4920796	total: 43.5ms	remaining: 228ms
8:	learn: 0.4736773	total: 48.9ms	remaining: 223ms
9:	learn: 0.4563961	total: 53.8ms	remaining: 215ms
10:	learn: 0.4402336	total: 59.3ms	remaining: 210ms
11:	learn: 0.4250689	total: 64.4ms	remaining: 204ms
12:	learn: 0.4103986	total: 69.7ms	remaining: 198ms
13:	learn: 0.3966527	total: 75.1ms	remaining: 193ms
14:	learn: 0.3840328	total: 80.3ms	remaining: 187ms
15:	learn: 0.3719534	total: 85.6ms	remaining: 182ms
16:	learn: 0.3604441	total: 90.9ms	remaining: 176ms
17:	learn: 0.3494783	total: 96.3ms	remaining: 171ms
18:	learn: 0.3390224	total: 102ms	remaining: 166ms
19:	learn: 0.3290339	total

14:	learn: 0.3835268	total: 82ms	remaining: 191ms
15:	learn: 0.3715237	total: 87.2ms	remaining: 185ms
16:	learn: 0.3600436	total: 92.4ms	remaining: 179ms
17:	learn: 0.3490777	total: 97.6ms	remaining: 174ms
18:	learn: 0.3387313	total: 103ms	remaining: 168ms
19:	learn: 0.3289831	total: 109ms	remaining: 163ms
20:	learn: 0.3195964	total: 114ms	remaining: 158ms
21:	learn: 0.3108198	total: 120ms	remaining: 153ms
22:	learn: 0.3024968	total: 125ms	remaining: 147ms
23:	learn: 0.2946457	total: 130ms	remaining: 141ms
24:	learn: 0.2872401	total: 136ms	remaining: 136ms
25:	learn: 0.2797939	total: 141ms	remaining: 131ms
26:	learn: 0.2731569	total: 146ms	remaining: 125ms
27:	learn: 0.2668023	total: 152ms	remaining: 119ms
28:	learn: 0.2607011	total: 157ms	remaining: 114ms
29:	learn: 0.2549487	total: 163ms	remaining: 108ms
30:	learn: 0.2494301	total: 168ms	remaining: 103ms
31:	learn: 0.2437415	total: 173ms	remaining: 97.6ms
32:	learn: 0.2383896	total: 179ms	remaining: 92.1ms
33:	learn: 0.2332447	total:

37:	learn: 0.2165637	total: 201ms	remaining: 63.4ms
38:	learn: 0.2126477	total: 206ms	remaining: 58.2ms
39:	learn: 0.2089034	total: 211ms	remaining: 52.9ms
40:	learn: 0.2054542	total: 217ms	remaining: 47.5ms
41:	learn: 0.2018320	total: 222ms	remaining: 42.2ms
42:	learn: 0.1985064	total: 227ms	remaining: 37ms
43:	learn: 0.1954108	total: 233ms	remaining: 31.7ms
44:	learn: 0.1927929	total: 238ms	remaining: 26.4ms
45:	learn: 0.1900422	total: 244ms	remaining: 21.2ms
46:	learn: 0.1875497	total: 249ms	remaining: 15.9ms
47:	learn: 0.1850169	total: 254ms	remaining: 10.6ms
48:	learn: 0.1824403	total: 259ms	remaining: 5.29ms
49:	learn: 0.1799977	total: 265ms	remaining: 0us
0:	learn: 0.6609851	total: 6ms	remaining: 294ms
1:	learn: 0.6314942	total: 11.5ms	remaining: 275ms
2:	learn: 0.6042811	total: 17ms	remaining: 266ms
3:	learn: 0.5787093	total: 22.3ms	remaining: 256ms
4:	learn: 0.5547696	total: 27.7ms	remaining: 249ms
5:	learn: 0.5322658	total: 33ms	remaining: 242ms
6:	learn: 0.5115892	total: 38.

0:	learn: 0.6609209	total: 5.53ms	remaining: 271ms
1:	learn: 0.6313555	total: 10.9ms	remaining: 261ms
2:	learn: 0.6038788	total: 16.3ms	remaining: 255ms
3:	learn: 0.5785048	total: 21.9ms	remaining: 252ms
4:	learn: 0.5545122	total: 28.1ms	remaining: 253ms
5:	learn: 0.5320108	total: 33.4ms	remaining: 245ms
6:	learn: 0.5113463	total: 38.9ms	remaining: 239ms
7:	learn: 0.4920796	total: 44.1ms	remaining: 231ms
8:	learn: 0.4736773	total: 49.6ms	remaining: 226ms
9:	learn: 0.4563961	total: 54.8ms	remaining: 219ms
10:	learn: 0.4402336	total: 60ms	remaining: 213ms
11:	learn: 0.4250689	total: 65.4ms	remaining: 207ms
12:	learn: 0.4103986	total: 70.6ms	remaining: 201ms
13:	learn: 0.3966527	total: 76.3ms	remaining: 196ms
14:	learn: 0.3840328	total: 81.4ms	remaining: 190ms
15:	learn: 0.3719534	total: 86.8ms	remaining: 184ms
16:	learn: 0.3604441	total: 92.2ms	remaining: 179ms
17:	learn: 0.3494783	total: 97.5ms	remaining: 173ms
18:	learn: 0.3390224	total: 103ms	remaining: 168ms
19:	learn: 0.3290339	tota

In [59]:
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.95567 (+/- 0.00432)
F1 Score: 0.72956 (+/- 0.03346)
Precision: 0.88613 (+/- 0.02991)
Recall: 0.62055 (+/- 0.04892)
