In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer

In [3]:
cancer=load_breast_cancer()
print(cancer.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])


In [5]:
print(cancer['feature_names'])
print(len(cancer['feature_names']))
print(cancer['target_names'])

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
30
['malignant' 'benign']


In [6]:
print('data shape:', cancer['data'].shape)
print('target shape:', cancer['target'].shape)

data shape: (569, 30)
target shape: (569,)


In [7]:
print(cancer['data'][-2:])
print(cancer['target'][-2:])

[[2.060e+01 2.933e+01 1.401e+02 1.265e+03 1.178e-01 2.770e-01 3.514e-01
  1.520e-01 2.397e-01 7.016e-02 7.260e-01 1.595e+00 5.772e+00 8.622e+01
  6.522e-03 6.158e-02 7.117e-02 1.664e-02 2.324e-02 6.185e-03 2.574e+01
  3.942e+01 1.846e+02 1.821e+03 1.650e-01 8.681e-01 9.387e-01 2.650e-01
  4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 1.810e+02 5.263e-02 4.362e-02 0.000e+00
  0.000e+00 1.587e-01 5.884e-02 3.857e-01 1.428e+00 2.548e+00 1.915e+01
  7.189e-03 4.660e-03 0.000e+00 0.000e+00 2.676e-02 2.783e-03 9.456e+00
  3.037e+01 5.916e+01 2.686e+02 8.996e-02 6.444e-02 0.000e+00 0.000e+00
  2.871e-01 7.039e-02]]
[0 1]


In [8]:
x=cancer['data']
y=cancer['target']
index=[i for i in range(x.shape[0])]
np.random.shuffle(index)
x=x[index]
y=y[index]

In [10]:
x_train, y_train=x[:400], y[:400]
x_test, y_test=x[400:], y[400:]
print('train data:', x_train.shape, 'test data:', x_test.shape)
print('train target:', y_train.shape, 'test data:', y_test.shape)

train data: (400, 30) test data: (169, 30)
train target: (400,) test data: (169,)


In [11]:
w=0.0001*np.random.randn(30)
b=0.001*np.random.randn(1)
def linear(x):
    return x.dot(w)+b
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [12]:
def cross_entropy_error(p,y):
    delta=1e-7
    return -np.average(y*np.log(p+delta)+(1-y)*np.log(1-p+delta))

In [13]:
def numerical_gradient(f,theta):
    h=1e-4
    grad=np.zeros_like(theta)
    for idx in range(theta.size):
        tmp_val=theta[idx]
        theta[idx]=tmp_val+h
        fxh1=f(theta)
        theta[idx]=tmp_val-h
        fxh2=f(theta)
        grad[idx]=(fxh1-fxh2)/(2*h)
        theta[idx]=tmp_val
    return grad

In [14]:
num_epoch=20000
learning_rate=0.00003
for epoch in range(num_epoch):
    w=w-learning_rate*numerical_gradient(lambda w:cross_entropy_error(sigmoid(linear(x_train)), y_train), w)
    b=b-learning_rate*numerical_gradient(lambda b:cross_entropy_error(sigmoid(linear(x_train)), y_train), b)
    pred=sigmoid(linear(x_train))
    loss=cross_entropy_error(pred, y_train)
    if epoch%1000==0:
        print("{0} epoch, train loss={1}".format(epoch, loss))

0 epoch, train loss=0.7257258633377814
1000 epoch, train loss=0.2082227338360304
2000 epoch, train loss=0.2021645459426015
3000 epoch, train loss=0.1986718139945929
4000 epoch, train loss=0.19584206268580318
5000 epoch, train loss=0.19340639408769497
6000 epoch, train loss=0.19126048839684287
7000 epoch, train loss=0.18934793749286938
8000 epoch, train loss=0.1876312850201962
9000 epoch, train loss=0.186082632370056
10000 epoch, train loss=0.18467980436543627
11000 epoch, train loss=0.18340446851159
12000 epoch, train loss=0.18224109010545889
13000 epoch, train loss=0.1811762999135089
14000 epoch, train loss=0.18019848331671473
15000 epoch, train loss=0.17929749428958253
16000 epoch, train loss=0.1784644425597237
17000 epoch, train loss=0.17769152572597718
18000 epoch, train loss=0.1769718907722704
19000 epoch, train loss=0.17629951629755974


In [15]:
pred=sigmoid(linear(x_test))
loss=cross_entropy_error(pred, y_test)
print("{0} epoch, train loss={1}".format(epoch, loss))

19999 epoch, train loss=0.18617651756342654


In [16]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
pred=[0.0 if p<0.5 else 1.0 for p in pred]
conf_matrix=confusion_matrix(y_test, pred)
print(conf_matrix)
#class_report=classification_report(y_test, pred)
#print(class_report)

[[ 54  10]
 [  3 102]]
              precision    recall  f1-score   support

           0       0.95      0.84      0.89        64
           1       0.91      0.97      0.94       105

    accuracy                           0.92       169
   macro avg       0.93      0.91      0.92       169
weighted avg       0.92      0.92      0.92       169



In [30]:
print('precision')
print('malignant precision')
malig_prec=(conf_matrix[0][0]/(conf_matrix[0][0]+conf_matrix[1][0]))
print(round(malig_prec,2))
print('benign precision')
benign_prec=(conf_matrix[1][1]/(conf_matrix[0][1]+conf_matrix[1][1]))
print(round(benign_prec,2))

precision
malignant precision
0.95
benign precision
0.91


In [31]:
print('recall')
print('malignant recall')
malig_recall=(conf_matrix[0][0]/(conf_matrix[0][0]+conf_matrix[0][1]))
print(round(malig_recall,2))
print('benign recall')
benign_recall=(conf_matrix[1][1]/(conf_matrix[1][0]+conf_matrix[1][1]))
print(round(benign_recall,2))

recall
malignant recall
0.84
benign recall
0.97


In [32]:
print('f1-score')
print('malignant f1-score')
malig_f1=(2*malig_prec*malig_recall)/(malig_prec+malig_recall)
print(round(malig_f1,2))
print('benign f1-score')
benign_f1=(2*benign_prec*benign_recall)/(benign_prec+benign_recall)
print(round(benign_f1,2))

f1-score
malignant f1-score
0.89
benign f1-score
0.94
