In [6]:
'''
梯度遞減 (gradient descent)
'''
# 一個實數向量做為輸入，輸出一個單一的實數
# 讓函數得出最大 (或最小)的可能值
from functools import partial

def sum_of_squares(v):
    '''
    計算 v 之中所有元素的平方和
    '''
    return sum(v_i ** 2 for v_i in v)

# 梯度的估算
'''
點 x 的導數 (derivative)
    衡量 x 出現小小變化時 f(x)跟著變化的程度
        差商 (difference quotient)
'''
def difference_quotient(f, x, h):
    return (f(x + h) - f(x)) / h     # h 趨近於 0

# 平方函數
def square(x):
    return x * x

# 平方函數的導數
def derivative(x):
    return 2 * x

derivative_estimate = partial(difference_quotient, square, h=0.00001)
print("derivative_estimate : %.4f" % derivative_estimate)

# 畫出圖形可看出兩者基本上是相同的
import matplotlib.pyplot as plt
x = list(range(-10, 10))
plt.title("Actual Dervatives vs. Estimates")
plt.plot(x, map(derivative, x), 'rx', label='Actual')        # rx 紅色的 x
plt.plot(x, map(derivative_estimate, x), 'b+', label='Estimate')      # b+ 藍色的 +

plt.lengend(loc=9)
plt.show()

TypeError: must be real number, not functools.partial

In [13]:
# 計算偏導數
from linear_algebra import distance, vector_subtract, scalar_multiply
import random

def partial_difference_quotient(f, v, i, h):
    '''
    計算 f 在 v 中第 i 個元素對應的差商
    '''
    w = [v_j + (h if j == i else 0)      # 只針對 v 的第 i 個元素，加上 h 的變動量
        for j, v_j in enumerate(v)]
    return (f(w) - f(v)) / h

# 同樣來估計梯度
def estimate_gradient(f, v, h=0.00001):
    return [partial_difference_quotient(f, v, i, h)
           for i, _ in enumerate(v)]

def step(v, direction, step_size):
    '''
    從 v 沿著 direction 的方向移動 step_size 的距離
    '''
    return [v_i + step_size * direction_i
           for v_i, direction_i, in zip(v, direction)]

def sum_of_squares_gradient(v):
    return [2 * v_i for v_i in v]

# 取個隨機起始點
v = [random.randint(-10, 10) for i in range(3)]

tolerance = 0.0000001

while True:
    gradient = sum_of_squares_gradient(v)       # 計算出 v 所對應的梯度
    next_v = step(v, gradient, -0.01)           # 往梯度的負方向跨一小步
    if distance(next_v, v) < tolerance:         # 結果收斂到一定程度，就停止
        break
    v = next_v                                  # 若未收斂，則繼續

    
step_sizes = [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
# 使用例外處理
def safe(f):
    '''
    送回來的值與原函數 f 相同，
    但如果 f 出現錯誤，就送回一個無限大的結果
    '''
    def safe_f(*args, **kwargs):
        try:
            return f(*args, **kwargs)
        except:
            return float('inf')             # 在 python中表示「無限大」
        return safe_f

print("safe :", safe(5))

ImportError: cannot import name 'distance'

In [16]:
# 全部整合起來
def minimize_batch(target_fn, gradient_fn, theta_0, tolerance=0.000001):
    '''
    運用梯度遞減的做法，找出能讓目標函數值最小畫的相應 theta 值
    '''
    step_sizes = [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
    
    theta = theta_0                   # 為 theta 設定初始值
    target_fn = safe(target_fn)       # 目標函數 target_fn 的安全版
    value = target_fn(theta)          # 想要的最小化值
    
while True:
    gradient = gradient_fn(theta)
    next_thetas = [step(theta, gradient, -step_size)
                  for step_size in step_sizes]
    
    # 選出能讓誤差函數值最小化的相應值
    next_theta = min(next_thetas, key=target_fn)
    next_value = target_fn(next_theta)
    
    # 若結果收斂到一定程度，就停止下來
    if abs(value - next_value) < tolerance:
        return theta
    else:
        theta, value = next_theta, next_value
        
def negate(f):
    '''
    針對函數每個輸入值 x，全都送回相應的負函數值 -f(x)
    '''
    return lambda *args, **kwargs: -f(*args, **kwargs)

def negate_all(f):
    '''
    如果 f 送回一串數字列表，也會一樣
    '''
    return lambda *args, **kwargs: [-y for y in f(*args, **kwargs)]

def maximize_batch(target_fn, gradient_fn, theta_0, tolerance=0.000001):
    return minimize_batch(negate(target_fn),
                         negate_all(gradient_fn),
                         theta_0,
                         tolerance)

IndentationError: expected an indented block (<ipython-input-16-35fc89da0ced>, line 23)

In [20]:
# 隨機梯度遞減
import random

def in_random_order(data):
    '''
    生成器會以隨機的順序送回整組資料中的每個元素
    '''
    indexes = [i for i, _ in enumerate(data)]       # 建立索引列表
    random.shuffle(indexes)    # 打亂順序
    for i in indexes:          # 按照新的順序送回資料
        yield data[i]
        
def minimize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0 = 0.01):
    data = zip(x, y)
    theta = theta_0
    alpha = alpha_0
    min_theta, min_value = None, float("inf")
    iterations_with_no_improvement = 0
    
    # 如果進行 100 次的迭代卻沒有任何改進，就停止
    while iterations_with_no_improvement < 100:
        value = sum( taaget_fn(x_i, y_i, theta) for x_i, y_i in data)
        
        if value < min_value:
            # 如果找到一個新的最小值，就把它紀錄下來
            # 並且恢復到原始的間隔長度
            min_theta, min_value = theta, value
            iterations_with_no_improvement = 0
            alpha = alpha_0
        else:
            # 若沒有改善，就縮小間隔長度
            interations_with_no_improvement += 1
            alpha *= 0.9

        # 針對每個資料點，進行一次沿梯度跨步的動作
        for x_i, y_i in in_random_order(data):
            gradient_i = gradient_fn(x_i, y_i, theta)
            theta = vector_subtract(theta, scalar_multiply(alpha, gradient_i))
            
        return min_theta
    
# print("minimize_stochastic :", minimize_stochastic(0.852, 0.556, x, y, theta_0, alpha_0))


# 求取最大值
def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0 = 0.01):
    return minimize_stochastic(negate(target_fn),
                              negate_all(gradient_fn),
                              x, y, theta_0, alpha_0)

# print("maximize_stochastic :", maximize_stochastic(0.852, 0.556, x, y, theta_0, alpha_0))