In [1]:
'''
Deep Belief Net training
'''
import sys
import random
import numpy as np
import poisson_tools as pt
import matplotlib.pyplot as plt
from scipy.special import expit

In [2]:
def init_dbn(nodes, data_train, epoc):
    '''
    nodes, training paras like epoc...
    '''
    dbnet = {}
    dbnet['nodes'] = nodes
    dbnet['top_num'] = 2
    dbnet['h_num'] = len(nodes) - 2 # hidden 
    dbnet['w_up'] = []
    dbnet['w_down'] = []
    dbnet['b_up'] = []
    dbnet['b_down'] = []
        
    for layer in range(dbnet['h_num']-1):
        w = np.random.normal(0,0.01, nodes[layer] * nodes[layer+1] )
        w = w.reshape((nodes[layer], nodes[layer+1]))
        dbnet['w_up'].append(w)
        dbnet['w_down'].append(np.array([]))
        dbnet['b_up'].append(np.zeros(nodes[layer]))
        dbnet['b_down'].append(np.zeros(nodes[layer]))
    dbnet['b_up'].append(np.zeros(nodes[layer]))
    dbnet['b_down'].append(np.zeros(nodes[layer]))
    
    # top layers
    w = np.random.normal(0,0.01, (nodes[dbnet['h_num']-1] + nodes[dbnet['h_num']+1] ) * nodes[dbnet['h_num']] )
    w = w.reshape((nodes[dbnet['h_num']-1] + nodes[dbnet['h_num']+1], nodes[dbnet['h_num']]))
    dbnet['w_top'] = w
    dbnet['a_top'] = np.zeros(nodes[dbnet['h_num']-1] + nodes[dbnet['h_num']+1])
    dbnet['b_top'] = np.zeros(nodes[dbnet['h_num']])
    
    dbnet['epoc'] = epoc
    dbnet['batch'] = 500
    
    train_num = data_train.shape[0]
    pixel_on = np.sum(data_train,0)
    a = np.log((pixel_on + 0.01)/(train_num - pixel_on + 0.01))
    dbnet['b_down'][0] = a #init bias on visible unites
    return dbnet

In [3]:
def sigmoid_sampling(data, weight, bias):
    sum_data = np.dot(data, weight) + bias
    prob = expit(sum_data)
    rdm = np.random.random(prob.shape)
    index_on = rdm < prob
    samples = np.zeros(prob.shape)
    samples[index_on]=1.
    return samples

In [4]:
def get_gendata(dbnet, curr_layer, data, direction):
    data_gen = data
    if direction == 'up':
        for l in range(curr_layer):
            data_gen = sigmoid_sampling(data_gen, dbnet['w_up'][l], dbnet['b_up'][l+1])
    elif direction == 'down':
        for l in range(curr_layer):
            data_gen = sigmoid_sampling(data_gen, dbnet['w_down'][l], dbnet['b_down'][l-1])
    return data_gen
        

In [5]:
def update(v0, h0, v1, h1, a, b, w, eta ):    
    delta_a = eta * (v0 - v1)
    delta_b = eta * (h0 - h1)
    v0_matrix = np.transpose(np.tile(v0,(w.shape[1],1)))
    v1_matrix = np.transpose(np.tile(v1,(w.shape[1],1)))
    h0_matrix = np.tile(h0,(w.shape[0],1))
    h1_matrix = np.tile(h1,(w.shape[0],1))
    delta_w = eta * (v0_matrix*h0_matrix - v1_matrix*h1_matrix)
    
    a += delta_a
    b += delta_b
    w += delta_w
    return a, b, w

def CD1_update(data_v, a, b, w):
    eta = 0.001
    for k in range(data_v.shape[0]):
        data_h = sigmoid_sampling(data_v[k], w, b)
        gibbs_v = sigmoid_sampling(data_h, w.transpose(), a)
        gibbs_h = sigmoid_sampling(gibbs_v, w, b)
        a, b, w = update(data_v[k], data_h, gibbs_v, gibbs_h, a, b, w, eta)
    return a, b, w

In [6]:
def RBM_train(dbnet, curr_layer, Data_v):
    train_num = Data_v.shape[0]
    w = dbnet['w_up'][curr_layer]
    a = dbnet['b_down'][curr_layer]
    b = dbnet['b_up'][curr_layer+1]
    for epoc in range (dbnet['epoc']):
        #print 'RBM training epoc:', epoc+1
        for k in range(0, train_num, dbnet['batch']):
            max_bsize = min(train_num-k, dbnet['batch'])
            data_v = Data_v[k:k+max_bsize]
            data_v = get_gendata(dbnet, curr_layer, data_v, 'up')
            a, b, w = CD1_update(data_v, a, b, w)
    dbnet['w_up'][curr_layer] = w
    dbnet['b_down'][curr_layer] = a
    dbnet['b_up'][curr_layer+1] = b
    dbnet['w_down'][curr_layer] = np.transpose(dbnet['w_up'][curr_layer])
    return dbnet

In [7]:
def RBM_top(dbnet, Data_v, Labels):
    train_num = Data_v.shape[0]
    w = dbnet['w_top']
    a = dbnet['a_top']
    b = dbnet['b_top']
    for epoc in range (dbnet['epoc']):
        #print 'RBM training epoc:', epoc+1
        for k in range(0, train_num, dbnet['batch']):
            max_bsize = min(train_num-k, dbnet['batch'])
            data_v = Data_v[k:k+max_bsize]
            data_v = get_gendata(dbnet, dbnet['h_num']-1, data_v, 'up')
            label = Labels[k:k+max_bsize]
            d_label = np.zeros((max_bsize, 10))
            for i in range(max_bsize):
                d_label[i, int(label[i])] = 1.
            data_v = np.append(data_v,d_label, axis=1)
            a, b, w = CD1_update(data_v, a, b, w)
    dbnet['w_top'] = w
    dbnet['a_top'] = a
    dbnet['b_top'] = b
    return dbnet

In [8]:
def up_pass(dbnet, data):
    eta = 0.001
    d_up = []
    d_down = []
    d_up.append(data)
    for layer in range(dbnet['h_num']-1):
        d_up.append(sigmoid_sampling(d_up[layer], dbnet['w_up'][layer], dbnet['b_up'][layer+1]))
        d_down.append(predict(d_up[layer+1], dbnet['w_down'][layer], dbnet['b_down'][layer])) #probability instead of states of 0 and 1
        delta_w = eta*matrix_times(d_up[layer+1], d_up[layer]-d_down[layer])
        delta_b = eta*(d_up[layer]-d_down[layer])
        dbnet['w_down'][layer] += delta_w
        dbnet['b_down'][layer] += delta_b
    output = d_up[dbnet['h_num']-1]
    return dbnet, output

def down_pass(dbnet, data):
    eta = 0.001
    d_up = []
    d_down = []
    for layer in range(dbnet['h_num']):
        d_up.append(np.array([]))
        d_down.append(np.array([]))
    d_down[-1] = data
    for layer in range(dbnet['h_num']-1,0,-1):
        d_down[layer-1] = sigmoid_sampling(d_down[layer], dbnet['w_down'][layer-1], dbnet['b_down'][layer-1])
        d_up[layer] = predict(d_down[layer-1], dbnet['w_up'][layer-1], dbnet['b_up'][layer]) #probability instead of states of 0 and 1
        delta_w = eta*matrix_times(d_down[layer-1], d_down[layer]-d_up[layer])
        delta_b = eta*(d_down[layer]-d_up[layer])
        dbnet['w_up'][layer-1] += delta_w
        dbnet['b_up'][layer] += delta_b
    return dbnet

def predict(data, weight, bias):
    sum_data = np.dot(data, weight) + bias
    prob = expit(sum_data)
    return prob

def matrix_times(m, n):
    m_matrix = np.transpose(np.tile(m,(len(n), 1)))
    n_matrix = np.tile(n,(len(m), 1))
    return m_matrix*n_matrix

In [9]:
def top_train(dbnet, data, label):
    eta = 0.001
    d_label = np.zeros(10)
    d_label[int(label)]=1.
    d_bottom = np.append(data, d_label)
    d_up = sigmoid_sampling(d_bottom, dbnet['w_top'], dbnet['b_top'])
    d_bottom_gibb = sigmoid_sampling(d_up, np.transpose(dbnet['w_top']), dbnet['a_top'])
    d_up_gibb = sigmoid_sampling(d_bottom_gibb, dbnet['w_top'], dbnet['b_top'])
    a, b, w = update(d_bottom, d_up, d_bottom_gibb, d_up_gibb, dbnet['a_top'],  dbnet['b_top'], dbnet['w_top'], eta)
    dbnet['a_top'] = a
    dbnet['b_top'] = b
    dbnet['w_top'] = w
    return dbnet, d_bottom_gibb[0:len(data)]

In [10]:
def fine_tuning(dbnet, Data_v, Labels):
    eta = 0.001
    train_num = Data_v.shape[0]

    for epoc in range (dbnet['epoc']):
        #print 'epoc for fine_tuning: ', epoc+1
        for k in range(train_num):
            #UP-pass
            dbnet, top_input = up_pass(dbnet, Data_v[k]) # update dbnet
            #Top two layers as assoiciate memory
            #Train like RBM
            dbnet, top_output = top_train(dbnet, top_input, Labels[k])
            #Down-pass
            dbnet = down_pass(dbnet, top_output) # update dbnet
    return dbnet

In [11]:
def testing(dbnet, test_v, label):
    d_up=[]
    d_up.append(test_v)
    for layer in range(dbnet['h_num']-1):
        d_up.append(sigmoid_sampling(d_up[layer], dbnet['w_up'][layer], dbnet['b_up'][layer+1]))
    #top layers
    num_pen = dbnet['nodes'][dbnet['h_num']-1]
    d_top = sigmoid_sampling(d_up[-1], dbnet['w_top'][0:num_pen,:], dbnet['b_top'])
    d_bottom = sigmoid_sampling(d_top, np.transpose(dbnet['w_top']), dbnet['a_top'])
    d_down = []
    for layer in range(dbnet['h_num']):
        d_down.append(np.array([]))
    d_down[-1] = d_bottom[0:num_pen]
         
    for layer in range(dbnet['h_num']-1,0,-1):
        d_down[layer-1] = sigmoid_sampling(d_down[layer], dbnet['w_down'][layer-1], dbnet['b_down'][layer-1])
    
    p_label = np.dot(d_top, np.transpose(dbnet['w_top'])) + dbnet['a_top']
    d_label = p_label[num_pen:]
    
    predict = np.argmax(d_label)
    if predict == label :
        if d_label[predict] > 0.:
            result = 1
        else:
            result = -1
    elif max(d_label) > 0.:
        result = 0
    else:
        result = -1
    
    
    #pt.plot_digit(d_down[0])
    #plt.show()
    return result, d_down[0]

In [12]:
train_x, train_y = pt.get_train_data()
train_x = train_x > 50
train_num = 1

label_list = np.array(train_y).astype(int)
#digit = 5
index_digit = np.where(label_list<10)[0]
#index_digit = np.where(label_list==digit)[0]
if train_num <= 1 or train_num > len(index_digit):
    train_num = len(index_digit) - 1
else:
    train_num = train_num - 1

index_train = index_digit[0:train_num]
Data_v = np.array(train_x[index_train]).astype(float)
Labels = train_y[index_train]

test_x, test_y = pt.get_test_data()
test_x = test_x > 50

trail_num = 100
result_list=np.zeros((trail_num, 3))

np.random.seed(0)
nodes = [784, 500, 500, 2000, 10]

In [13]:
for epoc in range(trail_num, trail_num+1):#trail_num):
    print epoc
    dbnet = init_dbn(, Data_v, epoc)
    # Greedy Train RBMs layer by layer
    for layer in range(dbnet['h_num']-1):
        dbnet = RBM_train(dbnet, layer, Data_v)
    dbnet = RBM_top(dbnet, Data_v, Labels)
    dbnet = fine_tuning(dbnet, Data_v, Labels)
    result = []
    for i in range(len(test_x)):
        r, recon = testing(dbnet, test_x[i], test_y[i])
        result.append(r)
    result = np.array(result)
    correct = np.where(result==1)[0].shape[0]
    false = np.where(result==0)[0].shape[0]
    unclassify = np.where(result==-1)[0].shape[0]
    result_list[epoc, :] = [correct, false, unclassify]
print result_list

100


IndexError: index 100 is out of bounds for axis 0 with size 10

In [14]:
result_list

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [49]:
result_list = np.load('mnist_result.npy')
plt.clf()
plt.plot(range(1,10), result_list[1:, 1]*100./(result_list[1:, 1]+result_list[1:, 0]))
#plt.plot(range(1,10), result_list[1:, 2]/100.)
plt.grid('on')
plt.xlabel('Epocs')
plt.ylabel('Classification Error Rate (%)')
plt.title('MNIST Classification')
#plt.show()
plt.savefig('MNISTerror.pdf')

In [48]:
np.save('mnist_result.npy', result_list)

In [None]:
result = []
for i in range(len(test_x)):
    r, recon = testing(dbnet, test_x[i], test_y[i])
    result.append(r)
result = np.array(result)
correct = np.where(result==1)[0].shape[0]
false = np.where(result==0)[0].shape[0]
unclassify = np.where(result==-1)[0].shape[0]
#print 'correct rate is: ', correct*100./len(test_x), 'fault rate is: ', false*100./len(test_x)

In [None]:
print 'correct rate is: ', correct*100./len(test_x), 'fault rate is: ', false*100./len(test_x)

In [None]:
test_x, test_y = pt.get_test_data()
test_x = test_x > 50
digit = 5
#index_digit = np.where(label_list<10)[0]
label_list = np.array(test_y).astype(int)
index_digit = np.where(label_list==digit)[0]
test_x = test_x[index_digit]
test_y = test_y[index_digit]
result = []
for i in range(len(test_x)):
    result.append(testing(dbnet, test_x[i], test_y[i]))
result = np.array(result)
correct = np.where(result==1)[0].shape[0]
false = np.where(result==0)[0].shape[0]
unclassify = np.where(result==-1)[0].shape[0]
print 'correct rate is: ', correct*100./len(test_x), 'fault rate is: ', false*100./len(test_x)

In [None]:
print 'correct rate is: ', correct*100./len(test_x), 'fault rate is: ', false*100./len(test_x)