In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

In [4]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [6]:
# 유방암 데이터셋
bc = datasets.load_breast_cancer()
print(bc.feature_names, bc.target_names)

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension'] ['malignant' 'benign']


In [8]:
bc.data.shape

(569, 30)

In [10]:
X = bc.data
y = bc.target

In [12]:
print(X[0])

[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
 1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02
 6.399e-03 4.904e-02 5.373e-02 1.587e-02 3.003e-02 6.193e-03 2.538e+01
 1.733e+01 1.846e+02 2.019e+03 1.622e-01 6.656e-01 7.119e-01 2.654e-01
 4.601e-01 1.189e-01]


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
print(X_train.shape, X_test.shape)

(455, 30) (114, 30)


In [18]:
# Logistic Regression 코드 작성하기

In [20]:
LEARN_RATE = 0.001
ITERS = 1000
weights = None
bias = None

In [28]:
def fit(X, y):
    global weights, bias

    n_samples, n_features = X.shape
    weights = np.zeros(n_features)
    bias = 0

    for _ in range(ITERS):
        linear_preds = np.dot(X, weights) + bias
        preds = sigmoid(linear_preds)

        # 기울기 계산
        dw = (1/n_samples) * np.dot(X.T, (preds - y))
        db = (1/n_samples) * np.sum(preds - y)

        # 파라메터 업데이트
        weights = weights - LEARN_RATE * dw
        bias = bias - LEARN_RATE * db

In [36]:
def predict(X):
    linear_preds = np.dot(X, weights) + bias
    preds = sigmoid(linear_preds)

    # sigmoid 값이 0.5보다 작거나 같으면 0, 크면 1(bc.target_names)
    return [0 if y <= 0.5 else 1 for y in preds]

In [32]:
fit(X_train, y_train)
print(weights, bias)

[ 3.06909268e-01  4.14765151e-01  1.77510681e+00  6.58262153e-01
  2.74112755e-03 -1.55357787e-03 -5.84246709e-03 -2.42240194e-03
  5.35867238e-03  2.20699909e-03  1.37980639e-03  2.87248488e-02
 -1.14703044e-02 -8.02924633e-01  1.34867009e-04 -4.71755317e-04
 -7.48941788e-04 -1.02884775e-04  5.32250879e-04  1.53952549e-05
  3.25179283e-01  5.25937846e-01  1.78128362e+00 -1.00735265e+00
  3.37130828e-03 -6.58398912e-03 -1.24085568e-02 -2.85541121e-03
  7.51708524e-03  1.91377502e-03] 0.04003320537786994


  return 1 / (1 + np.exp(-x))


In [38]:
preds = predict(X_test)
print(preds)

[1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0]


  return 1 / (1 + np.exp(-x))


In [40]:
print([preds == y_test])

[array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False, False,  True,  True,  True,  True,  True,  True,
        True, False, False,  True,  True,  True,  True, False, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True])]


In [42]:
score = np.sum(preds == y_test) / len(y_test)
score

0.8947368421052632

In [44]:
# 학습률 높이고 다시 실행해보기!
