 # 測試 Discriminative Model
李宏毅 ML Lecture 5 中，關於 generative model 可能表現比較差的原因有給予一例試著說明
但並沒有給予相對應的 discriminative model 測試
本 notebook 以此為例實做並驗證

In [83]:
from math import log, exp
import random

In [84]:
class Data():
        def __init__(self, features, label=None):
            self.features = features
            self.label = label
            
        def __str__(self):
            return f'[{self.features}, {self.label}]'

## 對 B 做微分
$\begin{align}
L(f) & = \frac{1}{n} \sum_n -[\hat y^n(1-\sigma(z)) - (1- \hat y^n) \sigma(z)] \\
    & = \frac{1}{n} \sum_n -[\hat y^n - \hat y^n \sigma(z) - \sigma(z) + \hat y^n  \sigma(z)] \\
    & = -(\hat y^n - \sigma(z))
    \end{align}$

In [97]:
def f(data, w, b):
    sigmoid = lambda x: 1 / (1 + exp(-x))
    return sigmoid(sum(x_i * w_i for x_i, w_i in zip(data.features, w)) + b)

def loss(training_data_set, w , b):
    error = 0
    for d in training_data_set:
        error += -(d.label * log(f(d, w, b)) + (1-d.label) * log(1 - f(d, w, b)))
    return error

def cal_gd_w(training_data_set, w, b):
    gd_w = [0] * len(w)
    for i, w_i in enumerate(w):
        gd_w[i] = sum(-(d.label - f(d, w, b) * d.features[i]) for d in training_data_set) / len(training_data_set)
    return gd_w

def cal_gd_b(training_data_set, w, b):
    gd_b = sum(-(d.label - f(d, w, b)) for d in training_data_set) / len(training_data_set)
    return gd_b

def initial(sample_distribution):
    '''
    sample_distribution:
        list-like item contain 4 interger, as number of data with feature (0, 0), (0, 1), (1, 0), (1, 1)
    '''
    training_data_set = []
    for sample_number in sample_distribution:
        for first_feature in range(2):
            for second_feature in range(2):
                label = 1 if first_feature and second_feature else 0
                training_data_set += [Data((first_feature, second_feature), label) for i in range(sample_number)]

    w = [random.random(), random.random()]
    b = random.random()
    l = 0.3

    return training_data_set, w, b, l


def print_env(data_set, w, b, l, header='Initial'):
    print(f'{header}: w = {w}, b = {b}, learning rate = {l}')
    print(f'loss =', loss(data_set, w, b))

def print_test(data_set, w, b, header='Initial'):
    print(f'{header}: ', end='')
    for d in data_set:
        print('f({}) = {}'.format(d, f(d, w, b)), end= ', ')
    print()

def main():
    training_data_set, w, b, l = initial(sample_distribution=[4, 4, 4, 1])
    testing_data_set = [Data((1, 1)), Data((1, 0))]
    
    print_env(training_data_set, w, b, l)
    for times in range(1000):
        random.shuffle(training_data_set)
        gd_w = cal_gd_w(training_data_set, w, b)
        gd_b = cal_gd_b(training_data_set, w, b)
        
       # Updating weight
        b = b - l * gd_b
        for i, gd_w in enumerate(gd_w):
            w[i] -= l * gd_w
            
        if not (times+1) % 100:
            l *= 0.9
            
            # Export Env
            print_env(training_data_set, w, b, l, 'Round {}'.format(times+1))
            print('gd_w = {}, gd_b = {}', gd_w, gd_b)
        
        if not (times+1) % 50:
            print_test(testing_data_set, w, b, header='(Val) Round {}'.format(times+1))

In [98]:
if __name__ == "__main__":
    main()    

Initial: w = [0.1618279687505364, 0.0269260502685863], b = 0.1834607919821709, learning rate = 0.3
loss = 38.97168639731798
(Val) Round 50: f([(1, 1), None]) = 0.4893671905071753, f([(1, 0), None]) = 0.3113705884527738, 
Round 100: w = [1.4354607154356815, 1.408960517666553], b = -2.459797595881627, learning rate = 0.27
loss = 15.701753306140484
gd_w = {}, gd_b = {} -0.03671210418223761 0.04949140995495206
(Val) Round 100: f([(1, 1), None]) = 0.5949877822150302, f([(1, 0), None]) = 0.26418349010446596, 
(Val) Round 150: f([(1, 1), None]) = 0.6559854072827758, f([(1, 0), None]) = 0.23138584506339033, 
Round 200: w = [2.2085341382040737, 2.200600448018253], b = -3.5618435730417404, learning rate = 0.24300000000000002
loss = 10.955105454369953
gd_w = {}, gd_b = {} -0.02406909517205754 0.0343434473007312
(Val) Round 200: f([(1, 1), None]) = 0.6999985620846412, f([(1, 0), None]) = 0.20532984634880797, 
(Val) Round 250: f([(1, 1), None]) = 0.7306848555786265, f([(1, 0), None]) = 0.1861683535