 # 測試 Discriminative Model
李宏毅 ML Lecture 5 中，關於 generative model 可能表現比較差的原因有給予一例試著說明
但並沒有給予相對應的 discriminative model 測試
本 notebook 以此為例實做並驗證

In [1]:
from math import log, exp
import random
from collections import namedtuple

## ML Model 的相關函數

* Step1 function set: `f()` is a  logistic model
* Step 2 goodness of function: `loss() `
* Step 3 find the best function:
    * `cal_gradient()` to find gradient
    * `gradient_descient()` to update
    * 分兩個函數是因為想看看 gradient

### Gradient of Loss Function
#### 對 W 微分
$L(f) = -(\hat y^n - f_{w, b}(x^n))x_i^n$
#### 對 B 微分
$\begin{align}
L(f) & = \frac{1}{n} \sum_n -[\hat y^n(1-\sigma(z)) - (1- \hat y^n) \sigma(z)] \\
    & = \frac{1}{n} \sum_n -[\hat y^n - \hat y^n \sigma(z) - \sigma(z) + \hat y^n  \sigma(z)] \\
    & = -(\hat y^n - \sigma(z))
    \end{align}$

In [2]:
class Data():
    '''Class of Data'''
    def __init__(self, features, label=None):
        self.features = features
        self.label = label
            
    def __str__(self):
        return f'Data({self.features}, {self.label})'
        
    def __repr__(self):
        return self.__str__()

def f(data, w, b):
    sigmoid = lambda x: 1 / (1 + exp(-x))
    return sigmoid(sum(x_i * w_i for x_i, w_i in zip(data.features, w)) + b)

def loss(training_data_set, w , b):
    error = 0
    for d in training_data_set:
        error += -(d.label * log(f(d, w, b)) + (1-d.label) * log(1 - f(d, w, b)))
    return error

def cal_gradient(training_data_set, w, b):
    grad = dict(w=[0]*len(w), b=0)
    
    for i, w_i in enumerate(w):
        grad['w'][i] = sum(-(d.label - f(d, w, b)) * d.features[i] for d in training_data_set) / len(training_data_set)
    grad['b'] = sum(-(d.label - f(d, w, b)) for d in training_data_set) / len(training_data_set)
    return grad

def gradient_descent(env, grad):
    for i, wi in enumerate(grad['w']):
        env.w[i] -= env.l * wi
    env.b -= env.l * grad['b']
    
def update_learning_rate(env):
    env.l *= env.al

## 初始化環境並準備資料

由於只是個很小的例子，因此並沒有另外準備資料，而是在 `read_data()` 中直接設定參數並參生

In [3]:
class Env():
    def __init__(self, w, b, l, al):
        self.w = w
        self.b = b
        self.l = l
        self.al = al
        
    def __str__(self):
        return f'w={self.w}, b={self.b}, learning rate={self.l}, adapt rate ={self.al}'
    
    def __repr__(self):
        return self.__str__()


def initial():
    '''Return env which contain w, b, l, and al(adaptive_l)'''
    w = [random.random(), random.random()]
    b = random.random()
    l = 0.3
    al = 0.9
    return Env(w, b , l, al)


def read_data():
    '''
    following tuple all have 4 element, respond to number/label of data that have feature (0, 0), (0, 1), (1, 0), (1, 1)
    '''
    label = (0, 0, 0, 1)
    training_distribution = (4, 4, 4, 1)
    noise_distribution = (0, 0, 0, 0)
    validation_distribution = (0, 0, 1, 1)
    
    DataSets = namedtuple('DataSets', 'training, validation')
    data_type = [(f1, f2) for f1 in range(2) for f2 in range(2)]
    
    data_set = DataSets([], [])
    for num, l, d in zip(training_distribution, label, data_type):
        data_set.training.extend(Data(d, l) for _ in range(num))
    
    for num, l, d in zip(noise_distribution, label, data_type):
        data_set.training.extend(Data(d, int(not l )) for _ in range(num))
        
    for num, l, d in zip(validation_distribution, label, data_type):
        data_set.validation.extend(Data(d, l) for _ in range(num))
    
    return data_set

## 監控環境變數

In [4]:
def print_env(env, header='Initial'):
    print(f'{header}: {env}')


def print_valid(data_set, w, b, header='Initial'):
    print(f'{header}: ', end='')
    for d in data_set:
        print('f({}) = {}'.format(d, f(d, w, b)), end= ', ')
    print()

In [5]:
def main():
    env = initial()
    data_set = read_data()
    print_env(env)
    
    for times in range(1000):
        random.shuffle(data_set.training)
        grad = cal_gradient(data_set.training, env.w, env.b)
        gradient_descent(env, grad)
        if not (times+1) % 50:
            print_valid(data_set.validation, env.w, env.b, header='(Val) Round {}'.format(times+1))

        if not (times+1) % 100:
            update_learning_rate(env)
        
            print_env(env, header='(Env) Round {}'.format(times+1))
            print(f'gd_w={grad["w"]}, gd_b = {grad["b"]}, loss=', loss(data_set.training, env.w, env.b))
            
if __name__ == '__main__':
    main()

Initial: w=[0.7171448575830212, 0.20076584819593768], b=0.14139380290539472, learning rate=0.3, adapt rate =0.9
(Val) Round 50: f(Data((1, 0), 0)) = 0.14313693019764173, f(Data((1, 1), 1)) = 0.15635221164636176, 
(Val) Round 100: f(Data((1, 0), 0)) = 0.12279608554911947, f(Data((1, 1), 1)) = 0.1957238980804802, 
(Env) Round 100: w=[0.7018376673253899, 0.5529767026687703], b=-2.6680520077283343, learning rate=0.27, adapt rate =0.9
gd_w=[-0.02412048198084291, -0.028831021814464855], gd_b = 0.029130644508182164, loss= 2.879000241542745
(Val) Round 150: f(Data((1, 0), 0)) = 0.11711833188052842, f(Data((1, 1), 1)) = 0.2487264748171185, 
(Val) Round 200: f(Data((1, 0), 0)) = 0.11188854010559092, f(Data((1, 1), 1)) = 0.3001894804463901, 
(Env) Round 200: w=[1.290096267434316, 1.2251983195847997], b=-3.361690322300477, learning rate=0.24300000000000002, adapt rate =0.9
gd_w=[-0.01944727930164355, -0.021399091416470322], gd_b = 0.023436482617606322, loss= 2.2607390011110438
(Val) Round 250: f(D