 # 測試 Discriminative Model
李宏毅 ML Lecture 5 中，關於 generative model 可能表現比較差的原因有給予一例試著說明
但並沒有給予相對應的 discriminative model 測試
本 notebook 以此為例實做並驗證

In [58]:
from math import log, exp
import random

In [59]:
class Data():
        def __init__(self, features, label=None):
            self.features = features
            self.label = label
            
        def __str__(self):
            return f'[{self.features}, {self.label}]'

## 對 B 做微分
$\begin{align}
L(f) & = \frac{1}{n} \sum_n -[\hat y^n(1-\sigma(z)) - (1- \hat y^n) \sigma(z)] \\
    & = \frac{1}{n} \sum_n -[\hat y^n - \hat y^n \sigma(z) - \sigma(z) + \hat y^n  \sigma(z)] \\
    & = -(\hat y^n - \sigma(z))
    \end{align}$

In [63]:
def f(data, w, b):
    sigmoid = lambda x: 1 / (1 + exp(-x))
    return sigmoid(sum(x_i * w_i for x_i, w_i in zip(data.features, w)) + b)

def loss(training_data_set, w , b):
    error = 0
    for d in training_data_set:
        error += -(d.label * log(f(d, w, b)) + (1 - d.label) * log(1 - f(d, w, b)))
    return error

def cal_gd_w(training_data_set, w, b):
    gd_w = [0] * len(w)
    for i, w_i in enumerate(w):
        gd_w = sum(-(d.label - f(d, w, b) * d.features[i]) for d in training_data_set) / len(training_data_set)
    return gd_w

def cal_gd_b(training_data_set, w, b):
    gd_b = sum(-(d.label - f(d, w, b)) for d in training_data_set) / len(training_data_set)
    return gd_b

def initial(sample_distribution):
    '''
    sample_distribution:
        list-like item contain 4 interger, as number of data with feature (0, 0), (0, 1), (1, 0), (1, 1)
    '''
    training_data_set = []
    for sample_number in sample_distribution:
        for first_feature in range(2):
            for second_feature in range(2):
                label = 1 if first_feature and second_feature else 0
                training_data_set += [Data((first_feature, second_feature), label) for i in range(sample_number)]

    w = [random.random(), random.random()]
    b = random.random()
    l = 0.3

    return training_data_set, w, b, l


def print_env(data_set, w, b, l, header='Initial'):
    print(f'{header}: w = {w}, b = {b}, learning rate = {l}')
    print(f'loss =', loss(data_set, w, b))


def main():
    training_data_set, w, b, l = initial(sample_distribution=[4, 4, 4, 1])
#     x = [(1, 1)] * 1 + [(1, 0)] * 4 + [(0, 1)] * 4 + [(0, 0)] * 4
#     random.shuffle(x)
#     y = [1]*12 + [0]*12
    print_env(training_data_set, w, b, l)
    for i in range(1000):
        random.shuffle(traing_data_set)
        gd_w = cal_gd_w(training_data_set, w, b)
        move = gd(x, y, w, b)
        
        if not (i+1)% 100:
            print_env(training_data_set, w, b, l, 'Round {}'.format(i+1))
            print(f', move = {move}')
            print('f([1, 1]) =', f((1, 1), w, b), ', f([1, 0]) =', f((1, 0), w, b))
            l *= 0.5

        b = b - l * gd_b(x, y, w, b)
        for i, move_i in enumerate(move):
            w[i] = w[i] - l * move_i

    print(f'x={x}')

In [81]:
def f(data, w, b):
    sigmoid = lambda x: 1 / (1 + exp(-x))
    return sigmoid(sum(x_i * w_i for x_i, w_i in zip(data.features, w)) + b)

def loss(training_data_set, w , b):
    error = 0
    for d in training_data_set:
        error += -(d.label * log(f(d, w, b)) + (1-d.label) * log(1 - f(d, w, b)))
    return error

def cal_gd_w(training_data_set, w, b):
    gd_w = [0] * len(w)
    for i, w_i in enumerate(w):
        gd_w[i] = sum(-(d.label - f(d, w, b) * d.features[i]) for d in training_data_set) / len(training_data_set)
    return gd_w

def cal_gd_b(training_data_set, w, b):
    gd_b = sum(-(d.label - f(d, w, b)) for d in training_data_set) / len(training_data_set)
    return gd_b

def initial(sample_distribution):
    '''
    sample_distribution:
        list-like item contain 4 interger, as number of data with feature (0, 0), (0, 1), (1, 0), (1, 1)
    '''
    training_data_set = []
    for sample_number in sample_distribution:
        for first_feature in range(2):
            for second_feature in range(2):
                label = 1 if first_feature and second_feature else 0
                training_data_set += [Data((first_feature, second_feature), label) for i in range(sample_number)]

    w = [random.random(), random.random()]
    b = random.random()
    l = 0.3

    return training_data_set, w, b, l


def print_env(data_set, w, b, l, header='Initial'):
    print(f'{header}: w = {w}, b = {b}, learning rate = {l}')
    print(f'loss =', loss(data_set, w, b))

def print_test(data_set, w, b):
    for d in data_set:
        print('f({}) = {}'.format(d, f(d, w, b)), end= ', ')

def main():
    training_data_set, w, b, l = initial(sample_distribution=[4, 4, 4, 1])
    testing_data_set = [Data((1, 1)), Data((1, 0))]
#     x = [(1, 1)] * 1 + [(1, 0)] * 4 + [(0, 1)] * 4 + [(0, 0)] * 4
#     random.shuffle(x)
#     y = [1]*12 + [0]*12
    print_env(training_data_set, w, b, l)
    for times in range(1000):
        random.shuffle(training_data_set)
        gd_w = cal_gd_w(training_data_set, w, b)
        gd_b = cal_gd_b(training_data_set, w, b)
        
       # Updating weight
        b = b - l * gd_b
        for i, gd_w in enumerate(gd_w):
            w[i] -= l * gd_w
            
        if not (times+1) % 100:
            l *= 0.5
            
            # Export Env
            print_env(training_data_set, w, b, l, 'Round {}'.format(times+1))
            print('gd_w = {}, gd_b = {}', gd_w, gd_b)
    print_test(testing_data_set, w, b)

In [82]:
if __name__ == "__main__":
    main()    

Initial: w = [0.23993368387120584, 0.4650123013357019], b = 0.835341271080032, learning rate = 0.3
loss = 55.899103251262176
Round 100: w = [1.3698027987492127, 1.4139369978817713], b = -2.4182306778791802, learning rate = 0.15
loss = 15.923798308123251
gd_w = {}, gd_b = {} -0.03558680683752785 0.05020730502818559
Round 200: w = [1.8604475302373116, 1.8824263715512168], b = -3.0866617053617613, learning rate = 0.075
loss = 12.76795845628348
gd_w = {}, gd_b = {} -0.02777531886930533 0.03998777880877727
Round 300: w = [2.0644067821576755, 2.080307696161941], b = -3.3724122061425796, learning rate = 0.0375
loss = 11.641570660794692
gd_w = {}, gd_b = {} -0.025144390582333233 0.03640822550820725
Round 400: w = [2.1588231612226907, 2.172426137559618], b = -3.5059541745844647, learning rate = 0.01875
loss = 11.153024127246711
gd_w = {}, gd_b = {} -0.024026622626982796 0.034869196150100444
Round 500: w = [2.204377201164385, 2.216976331303533], b = -3.570642872686294, learning rate = 0.009375
l