In [1]:
import os
import numpy as np


In [2]:
class PLA(object): # 懶惰就把 PLA, PocketPLA 寫在一起了
    def __init__(self, x_dim, eta=1.0, pocket_maxiter=None, shuffle=False, verbose=False):
        self.shuffle = shuffle
        self.verbose = verbose
        self.eta = eta
        self.pocket_maxiter = pocket_maxiter
        self.Wxb = np.zeros((1,x_dim+1), dtype=np.float32) # (1, 4)
        if not self.pocket_maxiter is None:
            self.Wxb_pocket = np.zeros_like(self.Wxb, dtype=np.float32) # (1, 4)
    def predict(self, x, pocket=False):
        W = self.Wxb_pocket if pocket and (not self.pocket_maxiter is None) else self.Wxb
        X = np.append(x, [1], axis=-1)[...,np.newaxis]
        pred = np.squeeze(W @ X)
        return -1 if pred<=0 else 1
    def train(self, Xs, Ys):
        updates = 0
        last_errors = np.inf
        while True:
            if self.shuffle:
                idx = np.random.permutation(len(Xs))
                Xs, Ys = Xs[idx], Ys[idx] # faster
            success = True
            for x, y in zip(Xs, Ys):
                p = self.predict(x)
                if p!=y: # wrong
                    self.Wxb = self.Wxb + (self.eta*y*np.append(x, [1], axis=-1))[np.newaxis]
                    updates += 1
                    if self.verbose:
                        print('iteration {:d}: '.format(updates), self.Wxb)
                    success = False
                    break
            if not self.pocket_maxiter is None:
                errors = 0
                for x, y in zip(Xs, Ys):
                    p = self.predict(x)
                    errors += 1 if p!=y else 0
                if errors < last_errors:
                    last_errors = errors
                    self.Wxb_pocket = self.Wxb.copy()
                if updates>=self.pocket_maxiter:
                    return last_errors
            if success:
                return updates

In [3]:
def data_reader(filepath):
    with open(filepath, 'r') as fp:
        x = []
        y = []
        for line in fp:
            split_line = line.split()
            x.append(split_line[:-1])
            y.append(split_line[-1])

    x = np.asarray(x, dtype=np.float32)
    y = np.asarray(y, dtype=np.int16)
    return x, y

In [4]:
if not os.path.exists('./hw1_15_train.dat'):
    ! wget "https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_math/hw1_15_train.dat"
if not os.path.exists('./hw1_18_train.dat'):
    ! wget "https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_math/hw1_18_train.dat"
if not os.path.exists('./hw1_18_test.dat'):
    ! wget "https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_math/hw1_18_test.dat"

In [5]:
x, y = data_reader('./hw1_15_train.dat')

In [6]:
pla = PLA(x.shape[-1])
iterations = pla.train(x, y)
print('#iteration: {:d}'.format(iterations))

#iteration: 60


In [7]:
from tqdm import tqdm

In [8]:
ites = []
for _ in tqdm(range(2000), total=2000):
    pla = PLA(x.shape[-1], shuffle=True)
    iterations = pla.train(x, y)
    ites.append(iterations)
print('#iteration mean, std: {:.2f}, {:.2f}'.format(np.mean(ites), np.std(ites)))

100%|██████████| 2000/2000 [00:19<00:00, 104.24it/s]

#iteration mean, std: 40.28, 12.25





In [9]:
ites = []
for _ in tqdm(range(2000), total=2000):
    pla = PLA(x.shape[-1], eta=0.5, shuffle=True)
    iterations = pla.train(x, y)
    ites.append(iterations)
print('#iteration mean, std: {:.2f}, {:.2f}'.format(np.mean(ites), np.std(ites)))

100%|██████████| 2000/2000 [00:19<00:00, 104.60it/s]

#iteration mean, std: 40.08, 11.46





In [10]:
x_train, y_train = data_reader('./hw1_18_train.dat')
x_test, y_test = data_reader('./hw1_18_test.dat')

In [11]:
err_rates = []
for _ in tqdm(range(2000), total=2000):
    pla = PLA(x.shape[-1], pocket_maxiter=50, shuffle=True)
    pla.train(x_train, y_train)
    preds = np.squeeze(np.asarray([pla.predict(x, pocket=True) for x in x_test]))
    err = (preds!=y_test).mean()
    err_rates.append(err)
print('error rate mean, std: {:.2f}, {:.2f}'.format(np.mean(err_rates), np.std(err_rates)))

100%|██████████| 2000/2000 [07:30<00:00,  4.44it/s]

error rate mean, std: 0.13, 0.03





In [12]:
err_rates = []
for _ in tqdm(range(2000), total=2000):
    pla = PLA(x.shape[-1], pocket_maxiter=50, shuffle=True)
    pla.train(x_train, y_train)
    preds = np.squeeze(np.asarray([pla.predict(x, pocket=False) for x in x_test]))
    err = (preds!=y_test).mean()
    err_rates.append(err)
print('error rate mean, std: {:.2f}, {:.2f}'.format(np.mean(err_rates), np.std(err_rates)))

100%|██████████| 2000/2000 [07:31<00:00,  4.43it/s]

error rate mean, std: 0.35, 0.17





In [13]:
err_rates = []
for _ in tqdm(range(2000), total=2000):
    pla = PLA(x.shape[-1], pocket_maxiter=100, shuffle=True)
    pla.train(x_train, y_train)
    preds = np.squeeze(np.asarray([pla.predict(x, pocket=True) for x in x_test]))
    err = (preds!=y_test).mean()
    err_rates.append(err)
print('error rate mean, std: {:.2f}, {:.2f}'.format(np.mean(err_rates), np.std(err_rates)))

100%|██████████| 2000/2000 [15:04<00:00,  2.21it/s]

error rate mean, std: 0.11, 0.02



