In [2]:
'''
Deep Belief Net training
'''
import sys
import random
import numpy as np
import poisson_tools as pt
import matplotlib.pyplot as plt
from scipy.special import expit

In [89]:
def init_dbn(nodes, data_train):
    '''
    nodes, training paras like epoc...
    '''
    dbnet = {}
    dbnet['nodes'] = nodes
    dbnet['w_up'] = []
    dbnet['w_down'] = []
    dbnet['bias'] = []
    for layer in range(len(nodes)-1):
        w = np.random.normal(0,0.01, nodes[layer] * nodes[layer+1] )
        w = w.reshape((nodes[layer], nodes[layer+1]))
        dbnet['w_up'].append(w)
        dbnet['w_down'].append(np.array([]))
        dbnet['bias'].append(np.zeros(nodes[layer]))
    dbnet['epoc'] = 1
    dbnet['batch'] = 50
    train_num = data_train.shape[0]
    pixel_on = np.sum(data_train,0)
    a = np.log((pixel_on + 0.01)/(train_num - pixel_on + 0.01))
    dbnet['bias'][0] = a
    return dbnet

In [60]:
def sigmoid_sampling(data, weight, bias):
    sum_data = np.dot(data, weight) + bias
    prob = expit(sum_data)
    rdm = np.random.random(prob.shape)
    index_on = rdm < prob
    samples = np.zeros(prob.shape)
    samples[index_on]=1.
    return samples

In [61]:
def get_gendata(dbnet, curr_layer, data, direction):
    data_gen = data
    if direction == 'up':
        for l in range(curr_layer):
            data_gen = sigmoid_sampling(data_gen, dbnet['w_up'][l], dbnet['bias'][l+1])
    return data_gen
        

In [73]:
def update(v0, h0, v1, h1, a, b, w, eta ):    
    delta_a = eta * (v0 - v1)
    delta_b = eta * (h0 - h1)
    v0_matrix = np.transpose(np.tile(v0,(w.shape[1],1)))
    v1_matrix = np.transpose(np.tile(v1,(w.shape[1],1)))
    h0_matrix = np.tile(h0,(w.shape[0],1))
    h1_matrix = np.tile(h1,(w.shape[0],1))
    delta_w = eta * (v0_matrix*h0_matrix - v1_matrix*h1_matrix)
    
    a += delta_a
    b += delta_b
    w += delta_w
    return a, b, w

def CD1_update(data_v, a, b, w):
    eta = 0.001
    for k in range(data_v.shape[0]):
        data_h = sigmoid_sampling(data_v[k], w, b)
        gibbs_v = sigmoid_sampling(data_h, w.transpose(), a)
        gibbs_h = sigmoid_sampling(gibbs_v, w, b)
        a, b, w = update(data_v[k], data_h, gibbs_v, gibbs_h, a, b, w, eta)
    return a, b, w

In [93]:
def RBM_train(dbnet, curr_layer, Data_v):
    train_num = Data_v.shape[0]
    w = dbnet['w_up'][curr_layer]
    a = dbnet['bias'][curr_layer]
    b = dbnet['bias'][curr_layer+1]
    for epoc in range (dbnet['epoc']):
        print epoc+1
        for k in range(0, train_num, dbnet['batch']):
            max_bsize = min(train_num-k, dbnet['batch'])
            data_v = Data_v[k:k+max_bsize]
            data_v = get_gendata(dbnet, curr_layer, data_v, 'up')
            a, b, w = CD1_update(data_v, a, b, w)
    dbnet['w_up'][curr_layer] = w
    dbnet['bias'][curr_layer] = a
    dbnet['bias'][curr_layer+1] = b
    dbnet['w_down'][curr_layer] = np.transpose(dbnet['w_up'][curr_layer])
    return dbnet

In [64]:
train_x, train_y = pt.get_train_data()
train_x = train_x > 50
train_num = 1
digit = 5
label_list = np.array(train_y).astype(int)
index_digit = np.where(label_list==digit)[0]
if train_num <= 1 or train_num > len(index_digit):
    train_num = len(index_digit) - 1
else:
    train_num = train_num - 1

index_train = index_digit[0:train_num]
Data_v = np.array(train_x[index_train]).astype(float)
Labels = train_y[index_train]

In [98]:
np.random.seed(0)
nodes = [784, 500, 500, 2000, 10]
dbnet = init_dbn(nodes, Data_v)
#Train RBMs layer by layer
for layer in range(len(dbnet['nodes'])-2):
    dbnet = RBM_train(dbnet, layer, Data_v)

1
1
1


In [110]:
data_v = np.array(train_x[index_digit[-1]]).astype(float)
d_up = []
d_down = []
for i in range(len(dbnet['nodes'])-1):
    d_up.append(np.array([]))
    d_down.append(np.array([]))
d_up[0] = data_v
for layer in range(1, len(dbnet['nodes'])-1):
    d_up[layer] = sigmoid_sampling(d_up[layer-1], dbnet['w_up'][layer-1], dbnet['bias'][layer])
d_down[-1] = d_up[-1]
for layer in range(len(dbnet['nodes'])-3, -1, -1):
    d_down[layer] = sigmoid_sampling(d_down[layer+1], dbnet['w_down'][layer], dbnet['bias'][layer])

recon = d_down[0]
pt.plot_digit(recon)
plt.show()

In [100]:
def fine_tuning(dbnet, Data_v, Labels):
    eta = 0.001
    train_num = Data_v.shape[0]
    d_up = []
    d_down = []
    for i in range(len(dbnet['nodes'])):
        d_up.append(np.array([]))
        d_down.append(np.array([]))
    for epoc in range (dbnet['epoc']):
        print epoc+1
        for k in range(train_num):
            # first up then down
            d_up[0] = Data_v[k]
            for layer in range(1, len(dbnet['nodes'])):
                d_up[layer] = sigmoid_sampling(d_up[layer-1], dbnet['w_up'][layer-1], dbnet['bias'][layer])
            d_down[-1] = d_up[-1]
            for layer in range(len(dbnet['nodes'])-2, -1, -1):
                d_down[layer] = sigmoid_sampling(d_down[layer+1], dbnet['w_down'][layer], dbnet['bias'][layer])
                delta_w = d_down[layer] - d_up[layer]
                dbnet['w_down'][layer] += delta_w
                
            # then down and up
            d_down[-1] = np.zeros(dbnet['bias'][-1])
            d_down[-1][Labels[k]] = 1.
            for layer in range(len(dbnet['nodes'])-2, -1, -1):
                d_down[layer] = sigmoid_sampling(d_down[layer+1], dbnet['w_down'][layer], dbnet['bias'][layer])
            d_up[0] =d_down[0]
            for layer in range(1, len(dbnet['nodes'])):
                d_up[layer] = sigmoid_sampling(d_up[layer-1], dbnet['w_up'][layer-1], dbnet['bias'][layer])
                delta_w = d_up[layer] - d_down[layer]
                dbnet['w_up'][layer-1] += delta_w
    return dbnet

In [102]:
print range(len(dbnet['nodes'])-2, -1, -1)

[3, 2, 1, 0]
