In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.datasets
import sklearn.metrics
import itertools

In [2]:
class ScratchLogisticRegression:
    """
    ロジスティック回帰のスクラッチ実装

    Parameters
    ----------
    num_iter : int
      イテレーション数
    lr : float
      学習率
    no_bias : bool
      バイアス項を入れない場合はTrue
    lm : float
      正則化の強さ
    verbose : bool
      学習過程を出力する場合はTrue

    Attributes
    ----------
    self.coef_ : 次の形のndarray, shape (n_features, 1)
      パラメータ
    self.bias_ : 次の形のndarray, shape (1,)
      バイアス項
    self.loss : 次の形のndarray, shape (self.iter,)
      訓練データに対する損失の記録
    self.val_loss : 次の形のndarray, shape (self.iter,)
      検証データに対する損失の記録

    """
    def __init__(self, num_iter, lr=0.01, no_bias=False, lm=1, verbose=False):
        # ハイパーパラメータを属性として記録
        self.iter = num_iter
        self.lr = lr
        self.no_bias = no_bias
        self.lm = lm
        self.verbose = verbose
        # 学習するパラメータ
        self.coef_ = None
        self.bias_ = None
        # 損失を記録する配列を用意
        self.loss = np.zeros(self.iter)
        self.val_loss = np.zeros(self.iter)

    def _hypothesis(self, X):
        z = X @ self.coef_
        if not self.no_bias:
            z = z + self.bias_
        return 1 / (1 + np.exp(-z))

    def _gradient_descent(self, X, y):
        error = self.predict_proba(X).ravel() - y
        print(error.shape)
        self.coef_ = self.coef_ - (self.lr * (error @ X) / len(X)).reshape((-1, 1))
        if not self.no_bias:
            self.bias_ = self.bias_ - (self.lr * np.sum(error) / len(X))

    def fit(self, X, y, X_val=None, y_val=None):
        """
        ロジスティック回帰を学習する。検証データが入力された場合はそれに対する損失と精度もイテレーションごとに計算する。

        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            訓練データの特徴量
        y : 次の形のndarray, shape (n_samples, )
            訓練データの正解値
        X_val : 次の形のndarray, shape (n_samples, n_features)
            検証データの特徴量
        y_val : 次の形のndarray, shape (n_samples, )
            検証データの正解値
        """
        n_features = X.shape[1]
        self.coef_ = np.random.random((n_features, 1))
        if not self.no_bias:
            self.bias_ = np.zeros((1,))
        for i in range(self.iter):
            self._gradient_descent(X, y)
            y_pred = self.predict_proba(X)
            self.loss[i] = self._loss_func(y_pred, y)
            if X_val is not None and y_val is not None:
                y_pred_val = self.predict_proba(X_val)
                self.val_loss[i] = self._loss_func(y_pred_val, y_val)
            if self.verbose:
                # verboseをTrueにした際は学習過程を出力
                if X_val is not None and y_val is not None:
                    print("loss:", self.loss[i], "val_loss", self.val_loss[i])
                else:
                    print("loss:", self.loss[i])

    def predict(self, X):
        """
        ロジスティック回帰を使いラベルを推定する。

        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            サンプル

        Returns
        -------
            次の形のndarray, shape (n_samples,)
            ロジスティック回帰による推定結果
        """
        y = self.predict_proba(X)
        return np.where(y < 0.5, 0, 1).astype(np.int32)

    def predict_proba(self, X):
        """
        ロジスティック回帰を使い確率を推定する。

        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            サンプル

        Returns
        -------
            次の形のndarray, shape (n_samples,)
            ロジスティック回帰による推定結果
        """
        return self._hypothesis(X).ravel()
    
    def _loss_func(self, y_pred, y):
        return np.mean(-y * np.log(y_pred) - (1 - y) * np.log(1 - y_pred)) + 0

#### 学習を行う

In [3]:
# 学習データの用意
data = sklearn.datasets.load_iris()

In [4]:
features = [
    'sepal_length',
    'sepal_width',
    'petal_length',
    'petal_width',
]

In [5]:
X = pd.DataFrame(data.data, columns=features)
y = pd.DataFrame(data.target, columns=["Species"])

In [6]:
# データクラスの選択
df = pd.concat([X, y], axis=1)
# label: versicolor = 1, virginica = 2
df_selected = df.query("Species in [1, 2]")
df_selected

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,Species
50,7.0,3.2,4.7,1.4,1
51,6.4,3.2,4.5,1.5,1
52,6.9,3.1,4.9,1.5,1
53,5.5,2.3,4.0,1.3,1
54,6.5,2.8,4.6,1.5,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [7]:
# versicolor = 0, virginica = 1 となるように変換
df_selected = df_selected.assign(Species=df_selected["Species"] - 1)
df_selected

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,Species
50,7.0,3.2,4.7,1.4,0
51,6.4,3.2,4.5,1.5,0
52,6.9,3.1,4.9,1.5,0
53,5.5,2.3,4.0,1.3,0
54,6.5,2.8,4.6,1.5,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,1
146,6.3,2.5,5.0,1.9,1
147,6.5,3.0,5.2,2.0,1
148,6.2,3.4,5.4,2.3,1


In [8]:
from sklearn.model_selection import train_test_split
X = df_selected.iloc[:, 0:4].to_numpy()
y = df_selected.iloc[:, 4].to_numpy()
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=131)

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train_norm = scaler.transform(X_train)
X_valid_norm = scaler.transform(X_valid)
X_valid_norm

array([[ 0.76411038, -1.11159851,  1.11834564,  0.26733694],
       [ 1.086067  ,  0.69424745,  0.27718309,  1.41965133],
       [-0.36273779,  0.09229879, -0.44381338, -0.4240517 ],
       [-0.84567272,  0.09229879, -0.80431162, -0.88497745],
       [-0.20175948,  0.09229879, -0.20348123, -0.65451457],
       [ 1.086067  ,  0.69424745,  0.03685093, -0.4240517 ],
       [-1.32860765,  0.39327312, -0.44381338, -0.4240517 ],
       [-0.84567272, -0.20867553, -0.9244777 , -0.88497745],
       [ 2.69585009,  2.80106773,  1.83934211,  0.72826269],
       [-0.20175948, -0.20867553, -0.20348123, -1.11544033],
       [ 0.76411038,  0.39327312,  0.39734917,  1.41965133],
       [ 0.12019714, -1.11159851,  0.03685093, -0.4240517 ],
       [-0.68469441, -0.50964986, -0.9244777 , -1.57636608],
       [ 1.72998023,  0.09229879,  1.71917603,  0.26733694],
       [-0.5237161 ,  0.39327312,  0.27718309,  0.26733694],
       [-1.00665103,  0.39327312, -0.44381338, -0.4240517 ],
       [ 0.92508869,  0.

In [10]:
logistic = ScratchLogisticRegression(1000, verbose=True)
logistic.fit(X_train_norm, y_train)

(75,)
loss: 0.49506025366510625
(75,)
loss: 0.49430635024196284
(75,)
loss: 0.49355557094827834
(75,)
loss: 0.4928078923706938
(75,)
loss: 0.4920632913377902
(75,)
loss: 0.49132174491714814
(75,)
loss: 0.4905832304124441
(75,)
loss: 0.48984772536058824
(75,)
loss: 0.48911520752889753
(75,)
loss: 0.48838565491230884
(75,)
loss: 0.48765904573062774
(75,)
loss: 0.4869353584258138
(75,)
loss: 0.48621457165930243
(75,)
loss: 0.4854966643093613
(75,)
loss: 0.4847816154684832
(75,)
loss: 0.4840694044408111
(75,)
loss: 0.48336001073960055
(75,)
loss: 0.48265341408471146
(75,)
loss: 0.4819495944001364
(75,)
loss: 0.4812485318115606
(75,)
loss: 0.48055020664395137
(75,)
loss: 0.4798545994191846
(75,)
loss: 0.4791616908536968
(75,)
loss: 0.47847146185617184
(75,)
loss: 0.47778389352525624
(75,)
loss: 0.4770989671473055
(75,)
loss: 0.47641666419415835
(75,)
loss: 0.47573696632094187
(75,)
loss: 0.4750598553639047
(75,)
loss: 0.4743853133382779
(75,)
loss: 0.4737133224361647
(75,)
loss: 0.473043865