[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sensioai/dl/blob/master/perceptron/exercise_solution.ipynb)

# Perceptron exercise (solution)

Implement the Batch Gradient Descent algorithm for Softmax Regression to classify flowers with the Iris dataset from scratch with Python and NumPy. 

# Get the data

In [1]:
from sklearn import datasets

iris = datasets.load_iris()

X = iris["data"][:, (2, 3)]  # petal length, petal width

y = iris["target"]

In [2]:
# split in train, validation and test sets

import numpy as np

test_ratio = 0.2
total_size = len(X)

test_size = int(total_size * test_ratio)
train_size = total_size - test_size

ixs = np.random.permutation(total_size)

X_train = X[ixs[:train_size]]
y_train = y[ixs[:train_size]]
X_test = X[ixs[-test_size:]]
y_test = y[ixs[-test_size:]]

X_train.shape, X_test.shape

((120, 2), (30, 2))

In [3]:
# normalize data

X_mean, X_std = X_train.mean(axis=0), X_train.std(axis=0)

X_train_norm = (X_train - X_mean) / X_std
X_test_norm = (X_test - X_mean) / X_std

# Implement the softmax function

In [4]:
def softmax(x):
    return np.exp(x) / np.exp(x).sum(axis=-1, keepdims=True)

a = np.array([[ -1.1005929,   -4.40007828,  -1.34103465],
              [ -3.4269555,  -11.18871295,  -3.49347319],
              [ -0.71891006,  -3.55440292,  -1.11117902]])

a_softmax = np.array([[5.48491412e-01, 2.02405142e-02, 4.31268074e-01],
                      [5.16509697e-01, 2.19882186e-04, 4.83270420e-01],
                      [5.76630772e-01, 3.38422251e-02, 3.89527003e-01]])

assert np.allclose(a_softmax, softmax(a))

# Implement the training loop

In [5]:
def crossentropy(output, target):
    logits = output[np.arange(len(output)),target]
    entropy = - logits + np.log(np.sum(np.exp(output),axis=-1))
    return entropy

def grad_crossentropy(output, target):
    answers = np.zeros_like(output)
    answers[np.arange(len(output)),target] = 1    
    return (- answers + softmax(output)) / output.shape[0]

class SoftmaxRegression():
  def __init__(self, inputs, outputs):
    self.w = np.random.randn(inputs, outputs)
    self.ws = []
        
  def fit(self, x, y, epochs, lr):
    x = np.c_[np.ones(len(x)), x]
    for epoch in range(epochs):
        l = [] 
        # compute output
        y_hat = np.dot(x, self.w)
        # compute loss
        loss = crossentropy(y_hat, y).mean()
        l.append(loss)        
        # Backprop to compute gradients of loss w.r.t w1, b1, w2, b2
        # d loss / d output
        dldo = grad_crossentropy(y_hat, y)        
        # d loss / d w2 = d loss / d output * d output / d w2
        grad_w = np.dot(x.T, dldo)
        # Update weights using gradient descent
        self.w = self.w - lr * grad_w
        self.ws.append(self.w.copy())                    
        if not epoch % 10 and not epoch == 0:
            print(f'Epoch: {epoch}, Loss: {np.mean(l):.8f}')
    print(f'Epoch: {epoch+1}, Loss: {np.mean(l):.8f}')
    
  def predict2(self, x, w):
    x = np.c_[np.ones(len(x)), x]
    y_hat = np.dot(x, w)
    return np.argmax(softmax(y_hat), axis=1) 

  def predict(self, x):
    x = np.c_[np.ones(len(x)), x]
    y_hat = np.dot(x, self.w)
    return np.argmax(softmax(y_hat), axis=1)

In [6]:
softmaxRegression = SoftmaxRegression(3, 3)
epochs, lr = 200, 0.1
softmaxRegression.fit(X_train_norm, y_train, epochs, lr)

Epoch: 10, Loss: 0.66267689
Epoch: 20, Loss: 0.58467098
Epoch: 30, Loss: 0.53040358
Epoch: 40, Loss: 0.48954824
Epoch: 50, Loss: 0.45717709
Epoch: 60, Loss: 0.43057958
Epoch: 70, Loss: 0.40812795
Epoch: 80, Loss: 0.38878236
Epoch: 90, Loss: 0.37184527
Epoch: 100, Loss: 0.35682911
Epoch: 110, Loss: 0.34338084
Epoch: 120, Loss: 0.33123687
Epoch: 130, Loss: 0.32019515
Epoch: 140, Loss: 0.31009725
Epoch: 150, Loss: 0.30081649
Epoch: 160, Loss: 0.29224982
Epoch: 170, Loss: 0.28431221
Epoch: 180, Loss: 0.27693262
Epoch: 190, Loss: 0.27005101
Epoch: 200, Loss: 0.26424086


# Plot the decision boundaries of the trained model

In [7]:
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
from matplotlib import animation, rc
rc('animation', html='html5')

def plot_decision_regions(epoch, X, y, classifier, w, resolution=0.02):
    ax.clear()
    tit = ax.set_title(f"Epoch {epoch+1}", fontsize=14)
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    X_new = (np.array([xx1.ravel(), xx2.ravel()]).T - X_mean)/X_std
    
    Z = classifier.predict2(X_new, w)
    #print(Z)
    Z = Z.reshape(xx1.shape)
    ax.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
    ax.set_xlim(xx1.min(), xx1.max())
    ax.set_ylim(xx2.min(), xx2.max())
    ax.set_xlabel('petal length', fontsize=14)
    ax.set_ylabel('petal width', fontsize=14)
    classes = ["Iris-Setosa", "Iris-Versicolor", "Iris-Virginica"]
    for idx, cl in enumerate(np.unique(y)):
        ax.scatter(x=X[y == cl, 0], 
                    y=X[y == cl, 1],
                    alpha=0.8, 
                    c=colors[idx],
                    marker=markers[idx], 
                    label=classes[cl], 
                    edgecolor='black')
    ax.legend(loc='upper left', fontsize=14)
    return [tit]
        
def get_anim(fig, ax, X, y, model):
    def anim(i):
        return plot_decision_regions(i, X, y, model, model.ws[i])
    return anim

fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(111, autoscale_on=False)
animate = get_anim(fig, ax, X, y, softmaxRegression)
anim = animation.FuncAnimation(fig, animate, frames=epochs, interval=100, blit=True)
plt.close()
anim

# Evaluate the model in the test set

Compute accuracy of the model on the test set.

In [8]:
preds = softmaxRegression.predict(X_test_norm)
accuracy_score = np.mean(preds == y_test)
accuracy_score

0.9666666666666667