In [26]:
import numpy as np
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

In [3]:
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
print(X_train.shape)
print(X_test.shape)

(426, 30)
(143, 30)


In [62]:
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': {'l2', 'auc'},
    'num_leaves': 10,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

gbm = lgb.train(params,
                lgb_train,
                num_boost_round=20,
                valid_sets=lgb_eval,
                early_stopping_rounds=5)

y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)

[1]	valid_0's l2: 0.231116	valid_0's auc: 0.977883
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l2: 0.214317	valid_0's auc: 0.978826
[3]	valid_0's l2: 0.199361	valid_0's auc: 0.980503
[4]	valid_0's l2: 0.185777	valid_0's auc: 0.982075
[5]	valid_0's l2: 0.172996	valid_0's auc: 0.985744
[6]	valid_0's l2: 0.160727	valid_0's auc: 0.986897
[7]	valid_0's l2: 0.149767	valid_0's auc: 0.990461
[8]	valid_0's l2: 0.139723	valid_0's auc: 0.992767
[9]	valid_0's l2: 0.1303	valid_0's auc: 0.993606
[10]	valid_0's l2: 0.122056	valid_0's auc: 0.992767
[11]	valid_0's l2: 0.114424	valid_0's auc: 0.992558
[12]	valid_0's l2: 0.1075	valid_0's auc: 0.992558
[13]	valid_0's l2: 0.10118	valid_0's auc: 0.992977
[14]	valid_0's l2: 0.0955334	valid_0's auc: 0.992558
Early stopping, best iteration is:
[9]	valid_0's l2: 0.1303	valid_0's auc: 0.993606


In [63]:
print(y_test)
print(y_pred)

[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 1
 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0]
[0.36406845 0.64877126 0.68006758 0.61078806 0.67609998 0.6840601
 0.66961622 0.68408467 0.65626284 0.68007723 0.4505791  0.6164526
 0.67486159 0.48946514 0.4505791  0.36808476 0.63188741 0.31605594
 0.31567831 0.31542815 0.32743403 0.34080512 0.60120829 0.6843199
 0.37212367 0.67724383 0.67343264 0.43560968 0.67724383 0.31605594
 0.67343264 0.31116149 0.64174066 0.36908188 0.6802914  0.34947421
 0.67767096 0.3664825  0.64232896 0.31447151 0.52072276 0.67177936
 0.49999148 0.67532535 0.59544527 0.31447151 0.6802914  0.67745189
 0.67989484 0.32213221 0.31605594 0.35755488 0.36737441 0.6840601
 0.68090378 0.6840601  0.64877126 0.64710241 0.66299348 0.31605594
 0.36012794 0.31421275 0.67177936 0.6840601

In [64]:
y_pred_final = np.zeros(y_pred.shape, dtype=np.int8)
for i in range(y_pred.shape[0]):
    if y_pred[i] > 0.5:
        y_pred_final[i] = 1

In [65]:
print(y_test)
print(y_pred_final)

[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 1
 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0]
[0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 1 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 1
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 0 1
 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0]


In [66]:
print(accuracy_score(y_test, y_pred_final))

0.9440559440559441


In [67]:
lr = LogisticRegression(penalty='l1')
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
print(y_pred_lr)

[0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 0 0 1 1
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 1
 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0]


In [68]:
print(accuracy_score(y_test, y_pred_lr))
print(lr.score(X_test, y_test))

0.958041958041958
0.958041958041958
