In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

# 載入資料集

In [2]:
x, y = load_breast_cancer(return_X_y=True)
print(x.shape)
print(y.shape)

(569, 30)
(569,)


# 拆分訓練與測試集

In [3]:
train_x, test_x, train_y, test_y = train_test_split(x, y)
print(train_x.shape)
print(train_y.shape)

(426, 30)
(426,)


# 主要算法實現

In [4]:
class LogisticRegression:
    def __init__(self, lr, epochs):
        self.lr = lr
        self.epochs = epochs
        
    def fit(self, x, y):
        """梯度下降法"""
        
        n_samples, n_features = x.shape
        
        self.w = np.random.randn(n_features)
        self.b = np.random.randn()
        
        for _ in range(self.epochs):
            logit = np.dot(x, self.w) + self.b
            yhat = self._sigmoid(logit)
            
            dw = np.dot(x.T, (yhat - y)) / n_samples
            db = (yhat - y).mean()
        
            self.w -= self.lr * dw
            self.b -= self.lr * db
    
    def predict(self, x):
        logit = np.dot(x, self.w) + self.b
        yhat = self._sigmoid(logit)

        labels = np.zeros_like(yhat)
        labels[yhat > 0.5] = 1
        
        return labels
    
    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

# 訓練回歸器

In [5]:
regressor = LogisticRegression(lr=0.001, epochs=1000)
regressor.fit(train_x, train_y)

  return 1 / (1 + np.exp(-x))


# 計算預測準確率

In [6]:
predictions = regressor.predict(test_x)
(predictions == test_y).mean()

0.8881118881118881