In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

Load the iris dataset

In [None]:
df = datasets.load_iris()
X = df.data[0:100, [0, 2]]  # extract sepal length and petal length
y = df.target[0:100]  # 'Iris-setosa'=0, 'Iris-versicolor'=1

Plot the data

In [None]:
plt.scatter(X[:50, 0], X[:50, 1],
            color='red', marker='o', label='Setosa')
plt.scatter(X[50:100, 0], X[50:100, 1],
            color='blue', marker='s', label='Versicolor')

plt.xlabel('Sepal length [cm]')
plt.ylabel('Petal length [cm]')
plt.legend(loc='upper left')

plt.show()

OO-based adaptive linear neuron (ADAptive LInear NEuron classifier, AdalineGD).

In [None]:
from helpers import Classifier


class AdalineGD(Classifier):

    def __init__(self, eta=0.01, n_iter=50, random_state=1):
        self.eta = eta  # Learning rate (between 0.0 and 1.0)
        self.n_iter = n_iter
        self.random_state = random_state

        # to be initialized in the fit method
        self.w_ = None
        self.b_ = None
        self.losses_ = None

    def fit(self, X, y):
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
        self.b_ = np.float64(0.)
        self.losses_ = []

        for i in range(self.n_iter):
            net_input = self.net_input(X)

            output = self.activation(net_input)
            errors = (y - output)

            self.w_ += self.eta * 2.0 * X.T.dot(errors) / X.shape[0]
            self.b_ += self.eta * 2.0 * errors.mean()
            loss = (errors ** 2).mean()
            self.losses_.append(loss)

        return self

    def net_input(self, X):
        return np.dot(X, self.w_) + self.b_

    def activation(self, X):
        return X

    def predict(self, X):
        return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0)

Learning two Adaline models

In [None]:
ada1 = AdalineGD(n_iter=15, eta=0.1).fit(X, y)
ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y);

Training/learning process visualisation (for different learning rates)

In [None]:
_, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))

ax[0].plot(range(1, len(ada1.losses_) + 1), np.log10(ada1.losses_), marker='o')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(Mean squared error)')
ax[0].set_title('Adaline - Learning rate 0.1')

ax[1].plot(range(1, len(ada2.losses_) + 1), ada2.losses_, marker='o')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Mean squared error')
ax[1].set_title('Adaline - Learning rate 0.0001')

plt.show()

Improving gradient descent through feature scaling

In [None]:
from sklearn.preprocessing import StandardScaler

X_std = StandardScaler().fit_transform(X)

In [None]:
ada_gd = AdalineGD(n_iter=20, eta=0.5)
ada_gd.fit(X_std, y);

In [None]:
from helpers import plot_decision_regions

plot_decision_regions(X_std, y, clf=ada_gd)

plt.title('Adaline - Gradient descent')
plt.xlabel('Sepal length [standardized]')
plt.ylabel('Petal length [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

plt.plot(range(1, len(ada_gd.losses_) + 1), ada_gd.losses_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Mean squared error')
plt.tight_layout()
plt.show()