## Lasso regularisation

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler

## Read Data

In [20]:
data = pd.read_csv('../UNSW_Train.csv')
data.shape

(175341, 44)

In [21]:
data.head()

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack,is_intrusion
0,0.121478,113.0,0.0,2.0,6,4,258,172,74.08749,252,...,1,1,0,0,0,1,1,0,0,0
1,0.649902,113.0,0.0,2.0,14,38,734,42014,78.473372,62,...,1,2,0,0,0,1,6,0,0,0
2,1.623129,113.0,0.0,2.0,8,16,364,13186,14.170161,62,...,1,3,0,0,0,2,6,0,0,0
3,1.681642,113.0,3.0,2.0,12,12,628,770,13.677108,62,...,1,3,1,1,0,2,1,0,0,0
4,0.449454,113.0,0.0,2.0,10,6,534,268,33.373826,254,...,1,40,0,0,0,2,39,0,0,0


In [22]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
numerical_vars = list(data.select_dtypes(include=numerics).columns)
data = data[numerical_vars]
data.shape

(175341, 44)

### Train - Test Split

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    data.drop(labels=['is_intrusion'], axis=1),
    data['is_intrusion'],
    test_size=0.2,
    random_state=0)

X_train.shape, X_test.shape

((140272, 43), (35069, 43))

In [24]:
# linear models benefit from feature scaling

scaler = StandardScaler()
scaler.fit(X_train.fillna(0))

StandardScaler()

### Select features with Lasso

In [29]:
sel_ = SelectFromModel(
    LogisticRegression(C=1, penalty='l1', solver='liblinear', random_state=42))

sel_.fit(scaler.transform(X_train), y_train)

SelectFromModel(estimator=LogisticRegression(C=1, penalty='l1', random_state=42,
                                             solver='liblinear'))

In [30]:
# Visualise the index of the # features that were selected

sel_.get_support()

array([False, False,  True,  True, False,  True, False, False,  True,
        True,  True, False,  True, False, False,  True, False, False,
       False,  True, False, False, False,  True, False,  True, False,
        True, False, False, False, False, False,  True,  True, False,
       False, False, False, False,  True, False,  True])

In [31]:
selected_feat = X_train.columns[(sel_.get_support())]

print('total features: {}'.format((X_train.shape[1])))
print('selected features: {}'.format(len(selected_feat)))
print('features with coefficients shrank to zero: {}'.format(
    np.sum(sel_.estimator_.coef_ == 0)))

total features: 43
selected features: 16
features with coefficients shrank to zero: 27


### Examine coefficients that shrank to zero

In [32]:
# the number of features which coefficient was shrank to zero:
np.sum(sel_.estimator_.coef_ == 0)

27

In [33]:
# we can identify the removed features like this:

removed_feats = X_train.columns[(sel_.estimator_.coef_ == 0).ravel().tolist()]
removed_feats

Index(['dur', 'proto', 'spkts', 'sbytes', 'dbytes', 'sload', 'sloss', 'dloss',
       'dinpkt', 'sjit', 'djit', 'stcpb', 'dtcpb', 'dwin', 'synack', 'smean',
       'trans_depth', 'response_body_len', 'ct_srv_src', 'ct_state_ttl',
       'ct_dst_ltm', 'ct_dst_src_ltm', 'is_ftp_login', 'ct_ftp_cmd',
       'ct_flw_http_mthd', 'ct_src_ltm', 'is_sm_ips_ports'],
      dtype='object')

In [34]:
# we can then remove the features from the training and testing set
# like this:

X_train_selected = sel_.transform(X_train)
X_test_selected = sel_.transform(X_test)

X_train_selected.shape, X_test_selected.shape

((140272, 16), (35069, 16))

## Classifiers




In [35]:
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from catboost import CatBoostClassifier

## Metrics Evaluation




In [36]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, f1_score
from sklearn import metrics
from sklearn.model_selection import cross_val_score

### Logistic Regression




In [37]:
%%time
clf_LR = linear_model.LogisticRegression(n_jobs=-1, random_state=42, C=0.1).fit(X_train_selected, y_train)

CPU times: user 77.2 ms, sys: 191 ms, total: 268 ms
Wall time: 4.02 s


In [38]:
pred_y_test = clf_LR.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_test))

f1 = f1_score(y_test, pred_y_test)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_test)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 0.8865094527930651
F1 Score: 0.9188450715713062
FPR: 0.2390183176240441
TPR: 0.9457666960500357


### Naive Bayes




In [39]:
%%time
clf_NB = GaussianNB(var_smoothing=1e-09).fit(X_train_selected, y_train)

CPU times: user 44.3 ms, sys: 13.5 ms, total: 57.7 ms
Wall time: 55.9 ms


In [40]:
pred_y_testNB = clf_NB.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_testNB))

f1 = f1_score(y_test, pred_y_testNB)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testNB)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 0.8372066497476404
F1 Score: 0.8899978804986609
FPR: 0.442913035746043
TPR: 0.9694412962263359


### Random Forest




In [43]:
%%time
clf_RF = RandomForestClassifier(random_state=0,max_depth=100,n_estimators=1000).fit(X_train_selected, y_train)

CPU times: user 48.5 s, sys: 814 ms, total: 49.3 s
Wall time: 49.3 s


In [44]:
pred_y_testRF = clf_RF.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_testRF))

f1 = f1_score(y_test, pred_y_testRF, average='weighted', zero_division=0)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testRF)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 1.0
F1 Score: 1.0
FPR: 0.0
TPR: 1.0


### KNN




In [45]:
%%time
clf_KNN = KNeighborsClassifier(algorithm='ball_tree',leaf_size=1,n_neighbors=5,weights='uniform').fit(X_train_selected, y_train)

CPU times: user 1.67 s, sys: 12.4 ms, total: 1.68 s
Wall time: 1.68 s


In [46]:
pred_y_testKNN = clf_KNN.predict(X_test_selected)
print('accuracy_score:', accuracy_score(y_test, pred_y_testKNN))

f1 = f1_score(y_test, pred_y_testKNN)
print('f1:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testKNN)
print('fpr:', fpr[1])
print('tpr:', tpr[1])

accuracy_score: 0.9342154039179903
f1: 0.9523179629208607
fpr: 0.1353370087142095
tpr: 0.9670486504638375


### CatBoost




In [47]:
%%time
clf_CB = CatBoostClassifier(random_state=0,depth=7,iterations=50,learning_rate=0.04).fit(X_train_selected, y_train)

0:	learn: 0.6540126	total: 67.3ms	remaining: 3.3s
1:	learn: 0.6178859	total: 76.2ms	remaining: 1.83s
2:	learn: 0.5844064	total: 85.9ms	remaining: 1.34s
3:	learn: 0.5533122	total: 93.7ms	remaining: 1.08s
4:	learn: 0.5243932	total: 102ms	remaining: 921ms
5:	learn: 0.4974213	total: 111ms	remaining: 812ms
6:	learn: 0.4721808	total: 119ms	remaining: 732ms
7:	learn: 0.4485466	total: 127ms	remaining: 666ms
8:	learn: 0.4263700	total: 135ms	remaining: 614ms
9:	learn: 0.4055290	total: 140ms	remaining: 561ms
10:	learn: 0.3859581	total: 150ms	remaining: 532ms
11:	learn: 0.3675374	total: 162ms	remaining: 511ms
12:	learn: 0.3501529	total: 170ms	remaining: 484ms
13:	learn: 0.3337337	total: 178ms	remaining: 458ms
14:	learn: 0.3182331	total: 186ms	remaining: 435ms
15:	learn: 0.3035798	total: 195ms	remaining: 414ms
16:	learn: 0.2897001	total: 203ms	remaining: 395ms
17:	learn: 0.2765491	total: 212ms	remaining: 376ms
18:	learn: 0.2640806	total: 220ms	remaining: 359ms
19:	learn: 0.2522829	total: 229ms	rema

In [48]:
pred_y_testCB = clf_CB.predict(X_test_selected)
print('Accuracy:', accuracy_score(y_test, pred_y_testCB))

f1 = f1_score(y_test, pred_y_testCB, average='weighted', zero_division=0)
print('F1 Score:', f1)

fpr, tpr, thresholds = roc_curve(y_test, pred_y_testCB)
print('FPR:', fpr[1])
print('TPR:', tpr[1])

Accuracy: 1.0
F1 Score: 1.0
FPR: 0.0
TPR: 1.0


## Model Evaluation




In [49]:
import pandas as pd, numpy as np
test_df = pd.read_csv("../UNSW_Test.csv")
test_df.shape

(175341, 44)

In [50]:
# Create feature matrix X and target vextor y
y_eval = test_df['is_intrusion']
X_eval = test_df.drop(columns=['is_intrusion'])

In [51]:
X_eval = X_eval[selected_feat]

In [52]:
X_eval.shape

(175341, 16)

### Model Evaluation - Logistic Regression




In [53]:
modelLR = linear_model.LogisticRegression(n_jobs=-1, random_state=42, C=25)
modelLR.fit(X_train_selected, y_train)

LogisticRegression(C=25, n_jobs=-1, random_state=42)

In [54]:
# Predict on the new unseen test data
y_evalpredLR = modelLR.predict(X_eval)
y_predLR = modelLR.predict(X_test_selected)

In [55]:
train_scoreLR = modelLR.score(X_train_selected, y_train)
test_scoreLR = modelLR.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreLR)
print("Testing accuracy is ", test_scoreLR)

Training accuracy is  0.8874329873388844
Testing accuracy is  0.8869086657731899


In [56]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreLR)
print('F1 Score:',f1_score(y_test, y_predLR))
print('Precision Score:',precision_score(y_test, y_predLR))
print('Recall Score:', recall_score(y_test, y_predLR))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predLR))

Performance measures for test:
--------
Accuracy: 0.8869086657731899
F1 Score: 0.9192227789319322
Precision Score: 0.8928189910979228
Recall Score: 0.9472358645006926
Confusion Matrix:
 [[ 8537  2709]
 [ 1257 22566]]


### Cross validation - Logistic Regression




In [57]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelLR, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.88661 (+/- 0.14225)
F1 Score: 0.92223 (+/- 0.08440)
Precision: 0.90486 (+/- 0.19009)
Recall: 0.95050 (+/- 0.09048)


### Model Evaluation - Naive Bayes



In [58]:
modelNB = GaussianNB(var_smoothing=1e-08)
modelNB.fit(X_train_selected, y_train)

GaussianNB(var_smoothing=1e-08)

In [59]:
# Predict on the new unseen test data
y_evalpredNB = modelNB.predict(X_eval)
y_predNB = modelNB.predict(X_test_selected)

In [60]:
train_scoreNB = modelNB.score(X_train_selected, y_train)
test_scoreNB = modelNB.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreNB)
print("Testing accuracy is ", test_scoreNB)

Training accuracy is  0.8409589939546025
Testing accuracy is  0.8364367390002566


In [61]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreNB)
print('F1 Score:',f1_score(y_test, y_predNB))
print('Precision Score:',precision_score(y_test, y_predNB))
print('Recall Score:', recall_score(y_test, y_predNB))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predNB))

Performance measures for test:
--------
Accuracy: 0.8364367390002566
F1 Score: 0.8903963006840676
Precision Score: 0.8171933639647855
Recall Score: 0.9780044494815934
Confusion Matrix:
 [[ 6034  5212]
 [  524 23299]]


### Cross validation - Naive Bayes




In [62]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelNB, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.83994 (+/- 0.13001)
F1 Score: 0.89439 (+/- 0.07210)
Precision: 0.82770 (+/- 0.12821)
Recall: 0.97689 (+/- 0.02336)


### Model Evaluation - Random Forest




In [63]:
modelRF = RandomForestClassifier(random_state=0,max_depth=100,n_estimators=1000)
modelRF.fit(X_train_selected, y_train)

RandomForestClassifier(max_depth=100, n_estimators=1000, random_state=0)

In [64]:
# Predict on the new unseen test data
y_evalpredRF = modelRF.predict(X_eval)
y_predRF = modelRF.predict(X_test_selected)

In [65]:
train_scoreRF = modelRF.score(X_train_selected, y_train)
test_scoreRF = modelRF.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreRF)
print("Testing accuracy is ", test_scoreRF)

Training accuracy is  1.0
Testing accuracy is  1.0


In [66]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreRF)
print('F1 Score:', f1_score(y_test, y_predRF, average='weighted', zero_division=0))
print('Precision Score:', precision_score(y_test, y_predRF, average='weighted', zero_division=0))
print('Recall Score:', recall_score(y_test, y_predRF, average='weighted', zero_division=0))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predRF))

Performance measures for test:
--------
Accuracy: 1.0
F1 Score: 1.0
Precision Score: 1.0
Recall Score: 1.0
Confusion Matrix:
 [[11246     0]
 [    0 23823]]


### Cross validation - Random Forest




In [67]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelRF, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 1.00000 (+/- 0.00000)
F1 Score: 1.00000 (+/- 0.00000)
Precision: 1.00000 (+/- 0.00000)
Recall: 1.00000 (+/- 0.00000)


### Model Evaluation - KNN

In [68]:
modelKNN = KNeighborsClassifier(algorithm='ball_tree',leaf_size=1,n_neighbors=5,weights='uniform')
modelKNN.fit(X_train_selected, y_train)

KNeighborsClassifier(algorithm='ball_tree', leaf_size=1)

In [69]:
# Predict on the new unseen test data
y_evalpredKNN = modelKNN.predict(X_eval)
y_predKNN = modelKNN.predict(X_test_selected)

In [70]:
train_scoreKNN = modelKNN.score(X_train_selected, y_train)
test_scoreKNN = modelKNN.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreKNN)
print("Testing accuracy is ", test_scoreKNN)

Training accuracy is  0.9557003535987225
Testing accuracy is  0.9342154039179903


In [71]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreKNN)
print('F1 Score:', f1_score(y_test, y_predKNN))
print('Precision Score:', precision_score(y_test, y_predKNN))
print('Recall Score:', recall_score(y_test, y_predKNN))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predKNN))

Performance measures for test:
--------
Accuracy: 0.9342154039179903
F1 Score: 0.9523179629208607
Precision Score: 0.9380293159609121
Recall Score: 0.9670486504638375
Confusion Matrix:
 [[ 9724  1522]
 [  785 23038]]


### Cross validation - KNN



In [72]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))

f = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='f1')
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))

precision = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))

recall = cross_val_score(modelKNN, X_eval, y_eval, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 0.91570 (+/- 0.10550)
F1 Score: 0.94109 (+/- 0.06608)
Precision: 0.92780 (+/- 0.14905)
Recall: 0.95993 (+/- 0.05069)


### Model Evaluation - CatBoost




In [73]:
modelCB = CatBoostClassifier(random_state=0,depth=7,iterations=50,learning_rate=0.04)
modelCB.fit(X_train_selected, y_train)

0:	learn: 0.6540126	total: 9.13ms	remaining: 447ms
1:	learn: 0.6178859	total: 18.6ms	remaining: 447ms
2:	learn: 0.5844064	total: 27.9ms	remaining: 437ms
3:	learn: 0.5533122	total: 36.7ms	remaining: 422ms
4:	learn: 0.5243932	total: 46.2ms	remaining: 416ms
5:	learn: 0.4974213	total: 55.1ms	remaining: 404ms
6:	learn: 0.4721808	total: 63.7ms	remaining: 391ms
7:	learn: 0.4485466	total: 71.4ms	remaining: 375ms
8:	learn: 0.4263700	total: 78.9ms	remaining: 360ms
9:	learn: 0.4055290	total: 84.6ms	remaining: 338ms
10:	learn: 0.3859581	total: 93.2ms	remaining: 330ms
11:	learn: 0.3675374	total: 103ms	remaining: 325ms
12:	learn: 0.3501529	total: 112ms	remaining: 318ms
13:	learn: 0.3337337	total: 120ms	remaining: 310ms
14:	learn: 0.3182331	total: 131ms	remaining: 306ms
15:	learn: 0.3035798	total: 140ms	remaining: 298ms
16:	learn: 0.2897001	total: 151ms	remaining: 292ms
17:	learn: 0.2765491	total: 160ms	remaining: 285ms
18:	learn: 0.2640806	total: 169ms	remaining: 276ms
19:	learn: 0.2522829	total: 18

<catboost.core.CatBoostClassifier at 0x7fea50e88c40>

In [74]:
# Predict on the new unseen test data
y_evalpredCB = modelCB.predict(X_eval)
y_predCB = modelCB.predict(X_test_selected)

In [75]:
train_scoreCB = modelCB.score(X_train_selected, y_train)
test_scoreCB = modelCB.score(X_test_selected, y_test)
print("Training accuracy is ", train_scoreCB)
print("Testing accuracy is ", test_scoreCB)

Training accuracy is  1.0
Testing accuracy is  1.0


In [76]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
print('Performance measures for test:')
print('--------')
print('Accuracy:', test_scoreCB)
print('F1 Score:',f1_score(y_test, y_predCB, average='weighted', zero_division=0))
print('Precision Score:',precision_score(y_test, y_predCB, average='weighted', zero_division=0))
print('Recall Score:', recall_score(y_test, y_predCB, average='weighted', zero_division=0))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_predCB))

Performance measures for test:
--------
Accuracy: 1.0
F1 Score: 1.0
Precision Score: 1.0
Recall Score: 1.0
Confusion Matrix:
 [[11246     0]
 [    0 23823]]


### Cross validation - CatBoost





In [77]:
from sklearn.model_selection import cross_val_score
from sklearn import metrics

accuracy = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='accuracy')
f = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='f1')
precision = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='precision')
recall = cross_val_score(modelCB, X_eval, y_eval, cv=10, scoring='recall')

0:	learn: 0.6540044	total: 9.97ms	remaining: 489ms
1:	learn: 0.6178711	total: 20ms	remaining: 480ms
2:	learn: 0.5843887	total: 30.3ms	remaining: 475ms
3:	learn: 0.5532924	total: 39.4ms	remaining: 454ms
4:	learn: 0.5243678	total: 50ms	remaining: 450ms
5:	learn: 0.4973908	total: 60.4ms	remaining: 443ms
6:	learn: 0.4721493	total: 70.3ms	remaining: 432ms
7:	learn: 0.4485143	total: 78.5ms	remaining: 412ms
8:	learn: 0.4263381	total: 87ms	remaining: 396ms
9:	learn: 0.4054986	total: 92.9ms	remaining: 372ms
10:	learn: 0.3859262	total: 103ms	remaining: 364ms
11:	learn: 0.3675026	total: 115ms	remaining: 363ms
12:	learn: 0.3501168	total: 124ms	remaining: 354ms
13:	learn: 0.3336975	total: 134ms	remaining: 344ms
14:	learn: 0.3181948	total: 144ms	remaining: 335ms
15:	learn: 0.3035391	total: 153ms	remaining: 326ms
16:	learn: 0.2896583	total: 162ms	remaining: 315ms
17:	learn: 0.2765074	total: 173ms	remaining: 308ms
18:	learn: 0.2640375	total: 183ms	remaining: 299ms
19:	learn: 0.2522365	total: 193ms	rem

20:	learn: 0.2410597	total: 209ms	remaining: 288ms
21:	learn: 0.2303922	total: 219ms	remaining: 279ms
22:	learn: 0.2202647	total: 230ms	remaining: 270ms
23:	learn: 0.2106100	total: 238ms	remaining: 258ms
24:	learn: 0.2014534	total: 250ms	remaining: 250ms
25:	learn: 0.1927320	total: 261ms	remaining: 241ms
26:	learn: 0.1844001	total: 268ms	remaining: 228ms
27:	learn: 0.1764703	total: 277ms	remaining: 218ms
28:	learn: 0.1689360	total: 289ms	remaining: 209ms
29:	learn: 0.1617247	total: 296ms	remaining: 197ms
30:	learn: 0.1548681	total: 308ms	remaining: 189ms
31:	learn: 0.1483044	total: 315ms	remaining: 177ms
32:	learn: 0.1420569	total: 326ms	remaining: 168ms
33:	learn: 0.1360968	total: 337ms	remaining: 159ms
34:	learn: 0.1303952	total: 348ms	remaining: 149ms
35:	learn: 0.1249459	total: 359ms	remaining: 140ms
36:	learn: 0.1197640	total: 370ms	remaining: 130ms
37:	learn: 0.1147960	total: 380ms	remaining: 120ms
38:	learn: 0.1100538	total: 390ms	remaining: 110ms
39:	learn: 0.1055034	total: 401

42:	learn: 0.0929868	total: 429ms	remaining: 69.8ms
43:	learn: 0.0891938	total: 440ms	remaining: 60ms
44:	learn: 0.0855493	total: 451ms	remaining: 50.1ms
45:	learn: 0.0820744	total: 464ms	remaining: 40.3ms
46:	learn: 0.0787364	total: 474ms	remaining: 30.3ms
47:	learn: 0.0755352	total: 485ms	remaining: 20.2ms
48:	learn: 0.0724696	total: 494ms	remaining: 10.1ms
49:	learn: 0.0695412	total: 504ms	remaining: 0us
0:	learn: 0.6540065	total: 9.5ms	remaining: 466ms
1:	learn: 0.6178742	total: 20.5ms	remaining: 492ms
2:	learn: 0.5843916	total: 30.6ms	remaining: 480ms
3:	learn: 0.5532953	total: 39.7ms	remaining: 457ms
4:	learn: 0.5243717	total: 50.8ms	remaining: 457ms
5:	learn: 0.4973954	total: 61ms	remaining: 447ms
6:	learn: 0.4721544	total: 71.4ms	remaining: 439ms
7:	learn: 0.4485192	total: 80.4ms	remaining: 422ms
8:	learn: 0.4263429	total: 88.7ms	remaining: 404ms
9:	learn: 0.4055031	total: 95ms	remaining: 380ms
10:	learn: 0.3859305	total: 105ms	remaining: 372ms
11:	learn: 0.3675066	total: 115ms

3:	learn: 0.5532924	total: 43.9ms	remaining: 505ms
4:	learn: 0.5243678	total: 54.2ms	remaining: 488ms
5:	learn: 0.4973908	total: 64.6ms	remaining: 474ms
6:	learn: 0.4721493	total: 74.3ms	remaining: 456ms
7:	learn: 0.4485143	total: 83.6ms	remaining: 439ms
8:	learn: 0.4263381	total: 92.9ms	remaining: 423ms
9:	learn: 0.4054986	total: 99.8ms	remaining: 399ms
10:	learn: 0.3859262	total: 110ms	remaining: 389ms
11:	learn: 0.3675026	total: 120ms	remaining: 380ms
12:	learn: 0.3501168	total: 130ms	remaining: 370ms
13:	learn: 0.3336975	total: 140ms	remaining: 359ms
14:	learn: 0.3181948	total: 150ms	remaining: 350ms
15:	learn: 0.3035391	total: 160ms	remaining: 340ms
16:	learn: 0.2896583	total: 170ms	remaining: 330ms
17:	learn: 0.2765074	total: 180ms	remaining: 320ms
18:	learn: 0.2640375	total: 191ms	remaining: 311ms
19:	learn: 0.2522365	total: 204ms	remaining: 305ms
20:	learn: 0.2410379	total: 214ms	remaining: 295ms
21:	learn: 0.2303722	total: 224ms	remaining: 285ms
22:	learn: 0.2202421	total: 234

25:	learn: 0.1927320	total: 266ms	remaining: 246ms
26:	learn: 0.1844001	total: 273ms	remaining: 233ms
27:	learn: 0.1764703	total: 283ms	remaining: 222ms
28:	learn: 0.1689360	total: 295ms	remaining: 214ms
29:	learn: 0.1617247	total: 303ms	remaining: 202ms
30:	learn: 0.1548681	total: 314ms	remaining: 192ms
31:	learn: 0.1483044	total: 322ms	remaining: 181ms
32:	learn: 0.1420569	total: 333ms	remaining: 172ms
33:	learn: 0.1360968	total: 345ms	remaining: 162ms
34:	learn: 0.1303952	total: 356ms	remaining: 152ms
35:	learn: 0.1249459	total: 366ms	remaining: 142ms
36:	learn: 0.1197640	total: 377ms	remaining: 133ms
37:	learn: 0.1147960	total: 389ms	remaining: 123ms
38:	learn: 0.1100538	total: 399ms	remaining: 113ms
39:	learn: 0.1055034	total: 410ms	remaining: 103ms
40:	learn: 0.1011442	total: 418ms	remaining: 91.7ms
41:	learn: 0.0969850	total: 429ms	remaining: 81.6ms
42:	learn: 0.0929985	total: 437ms	remaining: 71.1ms
43:	learn: 0.0892047	total: 448ms	remaining: 61.2ms
44:	learn: 0.0855598	total:

42:	learn: 0.0929868	total: 433ms	remaining: 70.4ms
43:	learn: 0.0891938	total: 444ms	remaining: 60.6ms
44:	learn: 0.0855493	total: 455ms	remaining: 50.5ms
45:	learn: 0.0820744	total: 467ms	remaining: 40.6ms
46:	learn: 0.0787364	total: 479ms	remaining: 30.6ms
47:	learn: 0.0755352	total: 491ms	remaining: 20.4ms
48:	learn: 0.0724696	total: 501ms	remaining: 10.2ms
49:	learn: 0.0695412	total: 511ms	remaining: 0us
0:	learn: 0.6540065	total: 9.54ms	remaining: 468ms
1:	learn: 0.6178742	total: 19.7ms	remaining: 472ms
2:	learn: 0.5843916	total: 29.4ms	remaining: 461ms
3:	learn: 0.5532953	total: 39.5ms	remaining: 454ms
4:	learn: 0.5243717	total: 49.6ms	remaining: 447ms
5:	learn: 0.4973954	total: 60.1ms	remaining: 441ms
6:	learn: 0.4721544	total: 69.8ms	remaining: 429ms
7:	learn: 0.4485192	total: 78.9ms	remaining: 414ms
8:	learn: 0.4263429	total: 87.8ms	remaining: 400ms
9:	learn: 0.4055031	total: 94.5ms	remaining: 378ms
10:	learn: 0.3859305	total: 105ms	remaining: 372ms
11:	learn: 0.3675066	total

4:	learn: 0.5243678	total: 52.8ms	remaining: 475ms
5:	learn: 0.4973908	total: 65.8ms	remaining: 483ms
6:	learn: 0.4721493	total: 76.5ms	remaining: 470ms
7:	learn: 0.4485143	total: 85.2ms	remaining: 447ms
8:	learn: 0.4263381	total: 93.4ms	remaining: 426ms
9:	learn: 0.4054986	total: 99.6ms	remaining: 398ms
10:	learn: 0.3859262	total: 110ms	remaining: 390ms
11:	learn: 0.3675026	total: 121ms	remaining: 384ms
12:	learn: 0.3501168	total: 132ms	remaining: 376ms
13:	learn: 0.3336975	total: 142ms	remaining: 365ms
14:	learn: 0.3181948	total: 152ms	remaining: 355ms
15:	learn: 0.3035391	total: 163ms	remaining: 347ms
16:	learn: 0.2896583	total: 172ms	remaining: 335ms
17:	learn: 0.2765074	total: 183ms	remaining: 325ms
18:	learn: 0.2640375	total: 194ms	remaining: 317ms
19:	learn: 0.2522365	total: 205ms	remaining: 308ms
20:	learn: 0.2410379	total: 216ms	remaining: 299ms
21:	learn: 0.2303722	total: 227ms	remaining: 289ms
22:	learn: 0.2202421	total: 238ms	remaining: 279ms
23:	learn: 0.2105885	total: 245

24:	learn: 0.2014534	total: 256ms	remaining: 256ms
25:	learn: 0.1927320	total: 267ms	remaining: 246ms
26:	learn: 0.1844001	total: 274ms	remaining: 233ms
27:	learn: 0.1764703	total: 282ms	remaining: 222ms
28:	learn: 0.1689360	total: 294ms	remaining: 213ms
29:	learn: 0.1617247	total: 302ms	remaining: 201ms
30:	learn: 0.1548681	total: 313ms	remaining: 192ms
31:	learn: 0.1483044	total: 320ms	remaining: 180ms
32:	learn: 0.1420569	total: 332ms	remaining: 171ms
33:	learn: 0.1360968	total: 343ms	remaining: 162ms
34:	learn: 0.1303952	total: 354ms	remaining: 152ms
35:	learn: 0.1249459	total: 365ms	remaining: 142ms
36:	learn: 0.1197640	total: 376ms	remaining: 132ms
37:	learn: 0.1147960	total: 386ms	remaining: 122ms
38:	learn: 0.1100538	total: 398ms	remaining: 112ms
39:	learn: 0.1055034	total: 408ms	remaining: 102ms
40:	learn: 0.1011442	total: 415ms	remaining: 91.1ms
41:	learn: 0.0969850	total: 425ms	remaining: 81ms
42:	learn: 0.0929985	total: 433ms	remaining: 70.6ms
43:	learn: 0.0892047	total: 44

46:	learn: 0.0787364	total: 487ms	remaining: 31.1ms
47:	learn: 0.0755352	total: 498ms	remaining: 20.7ms
48:	learn: 0.0724696	total: 508ms	remaining: 10.4ms
49:	learn: 0.0695412	total: 519ms	remaining: 0us
0:	learn: 0.6540065	total: 9.64ms	remaining: 472ms
1:	learn: 0.6178742	total: 21.8ms	remaining: 522ms
2:	learn: 0.5843916	total: 32.2ms	remaining: 505ms
3:	learn: 0.5532953	total: 41.6ms	remaining: 478ms
4:	learn: 0.5243717	total: 53.2ms	remaining: 479ms
5:	learn: 0.4973954	total: 62.7ms	remaining: 460ms
6:	learn: 0.4721544	total: 72.3ms	remaining: 444ms
7:	learn: 0.4485192	total: 81.4ms	remaining: 427ms
8:	learn: 0.4263429	total: 90.5ms	remaining: 412ms
9:	learn: 0.4055031	total: 97ms	remaining: 388ms
10:	learn: 0.3859305	total: 109ms	remaining: 388ms
11:	learn: 0.3675066	total: 120ms	remaining: 380ms
12:	learn: 0.3501215	total: 130ms	remaining: 370ms
13:	learn: 0.3337021	total: 140ms	remaining: 360ms
14:	learn: 0.3182001	total: 151ms	remaining: 353ms
15:	learn: 0.3035450	total: 162m

9:	learn: 0.4054986	total: 89.4ms	remaining: 358ms
10:	learn: 0.3859262	total: 99.3ms	remaining: 352ms
11:	learn: 0.3675026	total: 111ms	remaining: 350ms
12:	learn: 0.3501168	total: 121ms	remaining: 344ms
13:	learn: 0.3336975	total: 130ms	remaining: 335ms
14:	learn: 0.3181948	total: 140ms	remaining: 328ms
15:	learn: 0.3035391	total: 151ms	remaining: 321ms
16:	learn: 0.2896583	total: 161ms	remaining: 312ms
17:	learn: 0.2765074	total: 171ms	remaining: 304ms
18:	learn: 0.2640375	total: 182ms	remaining: 296ms
19:	learn: 0.2522365	total: 195ms	remaining: 292ms
20:	learn: 0.2410379	total: 205ms	remaining: 283ms
21:	learn: 0.2303722	total: 216ms	remaining: 275ms
22:	learn: 0.2202421	total: 228ms	remaining: 267ms
23:	learn: 0.2105885	total: 235ms	remaining: 254ms
24:	learn: 0.2014325	total: 246ms	remaining: 246ms
25:	learn: 0.1927087	total: 257ms	remaining: 237ms
26:	learn: 0.1843779	total: 263ms	remaining: 224ms
27:	learn: 0.1764491	total: 273ms	remaining: 214ms
28:	learn: 0.1689145	total: 28

22:	learn: 0.2202647	total: 236ms	remaining: 277ms
23:	learn: 0.2106100	total: 244ms	remaining: 265ms
24:	learn: 0.2014534	total: 256ms	remaining: 256ms
25:	learn: 0.1927320	total: 266ms	remaining: 246ms
26:	learn: 0.1844001	total: 272ms	remaining: 232ms
27:	learn: 0.1764703	total: 284ms	remaining: 223ms
28:	learn: 0.1689360	total: 295ms	remaining: 213ms
29:	learn: 0.1617247	total: 302ms	remaining: 201ms
30:	learn: 0.1548681	total: 314ms	remaining: 193ms
31:	learn: 0.1483044	total: 322ms	remaining: 181ms
32:	learn: 0.1420569	total: 334ms	remaining: 172ms
33:	learn: 0.1360968	total: 345ms	remaining: 162ms
34:	learn: 0.1303952	total: 356ms	remaining: 152ms
35:	learn: 0.1249459	total: 366ms	remaining: 143ms
36:	learn: 0.1197640	total: 378ms	remaining: 133ms
37:	learn: 0.1147960	total: 389ms	remaining: 123ms
38:	learn: 0.1100538	total: 400ms	remaining: 113ms
39:	learn: 0.1055034	total: 410ms	remaining: 103ms
40:	learn: 0.1011442	total: 417ms	remaining: 91.6ms
41:	learn: 0.0969850	total: 42

40:	learn: 0.1011312	total: 412ms	remaining: 90.4ms
41:	learn: 0.0969728	total: 423ms	remaining: 80.6ms
42:	learn: 0.0929868	total: 432ms	remaining: 70.4ms
43:	learn: 0.0891938	total: 445ms	remaining: 60.7ms
44:	learn: 0.0855493	total: 455ms	remaining: 50.6ms
45:	learn: 0.0820744	total: 466ms	remaining: 40.5ms
46:	learn: 0.0787364	total: 476ms	remaining: 30.4ms
47:	learn: 0.0755352	total: 488ms	remaining: 20.3ms
48:	learn: 0.0724696	total: 497ms	remaining: 10.1ms
49:	learn: 0.0695412	total: 508ms	remaining: 0us
0:	learn: 0.6540065	total: 9.89ms	remaining: 485ms
1:	learn: 0.6178742	total: 21.2ms	remaining: 509ms
2:	learn: 0.5843916	total: 31.7ms	remaining: 496ms
3:	learn: 0.5532953	total: 40.6ms	remaining: 467ms
4:	learn: 0.5243717	total: 50.1ms	remaining: 451ms
5:	learn: 0.4973954	total: 60.6ms	remaining: 444ms
6:	learn: 0.4721544	total: 70ms	remaining: 430ms
7:	learn: 0.4485192	total: 78.4ms	remaining: 411ms
8:	learn: 0.4263429	total: 86.4ms	remaining: 394ms
9:	learn: 0.4055031	total:

In [78]:
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
print("F1 Score: %0.5f (+/- %0.5f)" % (f.mean(), f.std() * 2))
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))

Accuracy: 1.00000 (+/- 0.00000)
F1 Score: 1.00000 (+/- 0.00000)
Precision: 1.00000 (+/- 0.00000)
Recall: 1.00000 (+/- 0.00000)
