In [4]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from itertools import combinations
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

MAX_ITERATION = 10000

In [2]:
def data_generator():
    iris = pd.read_csv('iris.csv', usecols=[1, 2, 3, 4, 5])
    X = np.array(
        iris[['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width']])
    y = iris[['Species']].copy()
    y[y['Species'] == 'setosa'] = 0
    y[y['Species'] == 'versicolor'] = 1
    y[y['Species'] == 'virginica'] = 2
    y = np.array(y).reshape(-1)

    X0_train, X0_test, y0_train, y0_test = train_test_split(X[np.where(y == 0)],
                                                            y[np.where(
                                                                y == 0)],
                                                            test_size=20, random_state=1)
    X1_train, X1_test, y1_train, y1_test = train_test_split(X[np.where(y == 1)],
                                                            y[np.where(
                                                                y == 1)],
                                                            test_size=20, random_state=2)
    X2_train, X2_test, y2_train, y2_test = train_test_split(X[np.where(y == 2)],
                                                            y[np.where(
                                                                y == 2)],
                                                            test_size=20, random_state=3)
    X_train = np.concatenate((X0_train, X1_train, X2_train), axis=0)
    y_train = np.concatenate((y0_train, y1_train, y2_train))
    X_test = np.concatenate((X0_test, X1_test, X2_test), axis=0)
    y_test = np.concatenate((y0_test, y1_test, y2_test))
    X_train, y_train = shuffle(X_train, y_train)
    X_test, y_test = shuffle(X_test, y_test)

    return X_train, y_train, X_test, y_test


def OVO_data_preprocessing(X_train, y_train, X_test, y_test, num_classes):
    def data_slice(X, y, combination):
        X1 = X[np.where(y == combination[0])]
        X2 = X[np.where(y == combination[1])]
        X_ = np.concatenate((X1, X2), axis=0)
        y_ = np.concatenate((np.full(len((X1)), -1), np.full(len((X2)), 1)))
        X_, y_ = shuffle(X_, y_)

        return X_, y_

    PLA_X_train = []
    PLA_X_test = []
    PLA_y_train = []

    PLAs_combination = list(combinations(range(num_classes), 2))
    for i in range(len(PLAs_combination)):
        _X_train, _y_train = data_slice(X_train, y_train, PLAs_combination[i])
        sc = StandardScaler()
        sc.fit(_X_train)
        _X_train = sc.transform(_X_train)
        _X_test = sc.transform(X_test)
        PLA_X_train.append(_X_train)
        PLA_X_test.append(_X_test)
        PLA_y_train.append(_y_train)

    return PLA_X_train, PLA_y_train, PLA_X_test


def PLA(X, y, w_init, max_iter, lr=1):
    w = w_init.copy()
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)

    for i in range(max_iter):
        h = np.sign(np.dot(X, w))
        mistake_indices = np.where(h != y)[0]
        mistake_num = len(mistake_indices)
        if mistake_num > 0:
            np.random.shuffle(mistake_indices)
            j = np.random.choice(mistake_indices)
            w_ = w + lr * y[j]*X[j, :]
            h_ = np.sign(np.dot(X, w_))
            mistake_indices_ = np.where(h_ != y)[0]
            mistake_num_ = len(mistake_indices_)
            if mistake_num_ <= mistake_num:
                w = w_
        else:
            break

    h = np.sign(np.dot(X, w))
    mistake_indices = np.where(h != y)[0]
    mistake_num = len(mistake_indices)
    print('Accuracy of Perceptron Learning Algorithm is %.2f%%' %
          (100*(X.shape[0]-mistake_num)/X.shape[0]))
    return w


def PLA_OVO_train(X, y, num_classes):
    w = []
    PLAs_combination = list(combinations(range(num_classes), 2))
    for i in range(len(PLAs_combination)):
        np.random.seed()
        w_init = np.random.randn(X[i].shape[1]+1)
        w.append(PLA(X[i], y[i], w_init, MAX_ITERATION, lr=0.05))

    return np.array(w)


def PLA_OVO_test(X, y, num_classes, w):
    votes = []
    PLAs_combination = list(combinations(range(num_classes), 2))
    for i in range(len(PLAs_combination)):
        h = np.sign(np.dot(np.concatenate(
            (np.ones((X[i].shape[0], 1)), X[i]), axis=1), w[i]))
        h = np.where(h == -1, PLAs_combination[i][0], PLAs_combination[i][1])
        votes.append(h)
    votes = np.array(votes).T
    y_pred = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), 1, votes)

    mistake_indices = np.where(y != y_pred)[0]
    mistake_num = len(mistake_indices)
    print('Accuracy of PLA_OVO is %.2f%%' %
          (100*(X[0].shape[0]-mistake_num)/X[0].shape[0]))
    print('y_pred:', y_pred)
    print('y_true:', y)


def softmax_train(X, y, num_classes, max_iter, lr=0.001, eps=1e-4):
    def softmax(X, w):
        scores = np.dot(X, w.T)
        c = np.max(scores, axis=1).reshape(-1, 1)
        P = np.exp(scores - c) / \
            np.sum(np.exp(scores - c), axis=1).reshape(-1, 1)
        return P

    np.random.seed()
    w = np.random.randn(num_classes, X.shape[1]+1)
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    Y = np.eye(num_classes)[y]

    for i in range(max_iter):
        P = softmax(X, w)
        diff = -1/X.shape[0] * np.dot((Y-P).T, X)
        w_ = w - lr * diff
        delta = np.linalg.norm(w - w_)
        if (np.argmax(P, axis=1) == y).all():
            break
        if delta < eps:
            break
        else:
            w = w_
    print('Final weight matrix is:', w)
    return w


def softmax_test(X, y, num_classes, w):
    def softmax(X, w):
        scores = np.dot(X, w.T)
        c = np.max(scores, axis=1).reshape(-1, 1)
        P = np.exp(scores - c) / \
            np.sum(np.exp(scores - c), axis=1).reshape(-1, 1)
        return P

    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    P = softmax(X, w)
    y_pred = np.argmax(P, axis=1)
    mistake_indices = np.where(y_pred != y)[0]
    mistake_num = len(mistake_indices)
    print('Accuracy of Softmax is %.2f%%' %
          (100*(X.shape[0]-mistake_num)/X.shape[0]))
    print('y_pred:', y_pred)
    print('y_true:', y)

# PLA OVO

In [12]:
X_train, y_train, X_test, y_test = data_generator()
PLA_X_train, PLA_y_train, PLA_X_test = OVO_data_preprocessing(X_train, y_train, X_test, y_test, 3)
w = PLA_OVO_train(PLA_X_train, PLA_y_train, 3)
PLA_OVO_test(PLA_X_test, y_test, 3, w)

Accuracy of Perceptron Learning Algorithm is 100.00%
Accuracy of Perceptron Learning Algorithm is 100.00%
Accuracy of Perceptron Learning Algorithm is 98.33%
Accuracy of PLA_OVO is 95.00%
y_pred: [2 0 0 2 2 1 1 1 1 2 1 1 1 0 2 1 2 0 0 2 2 0 1 1 2 0 0 0 2 0 0 2 2 0 1 2 2
 2 2 0 2 1 1 2 0 0 1 0 2 1 0 1 0 1 2 0 0 2 1 1]
y_true: [2 0 0 2 2 2 1 1 1 2 1 1 1 0 2 1 2 0 0 2 2 0 1 1 2 0 0 0 2 0 0 2 1 0 1 1 2
 2 2 0 2 1 1 2 0 0 1 0 2 1 0 1 0 1 2 0 0 2 1 1]


# Softmax Regression

In [80]:
w = softmax_train(X_train, y_train, 3, MAX_ITERATION, lr=0.05)
softmax_test(X_test, y_test, 3, w)

Final weight matrix is: [[ 2.14084355e+00  2.87979283e+00  4.75036492e+00 -7.27068917e+00
   2.52720899e-01]
 [ 4.25455078e+00  2.27654969e+00 -5.68400397e-03 -9.40890519e-01
  -2.81530083e+00]
 [-5.12112404e+00 -3.32147360e+00 -5.81262040e+00  8.14678338e+00
   7.05984124e+00]]
Accuracy of Softmax is 98.33%
y_pred: [2 0 0 1 1 2 0 2 0 1 2 0 2 1 0 0 1 1 2 2 0 2 1 0 0 0 1 2 1 0 0 1 2 2 2 2 2
 0 2 1 2 1 0 2 0 2 1 2 0 1 0 2 1 0 1 0 1 1 1 2]
y_true: [2 0 0 1 1 2 0 2 0 1 2 0 2 1 0 0 1 1 2 1 0 2 1 0 0 0 1 2 1 0 0 1 2 2 2 2 2
 0 2 1 2 1 0 2 0 2 1 2 0 1 0 2 1 0 1 0 1 1 1 2]
