In [37]:
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import poisson_tools as pt
import matplotlib.patches as mpatches
from scipy.special import expit
import scipy.cluster.vq as spvq
import scipy.spatial.distance as spdt
import matplotlib.cm as cm
from scipy.optimize import curve_fit
import statsmodels.api as sm
#from scipy.stats import poisson
from scipy.misc import factorial

In [38]:
def sigmoid_sampling(data, weight, bias):
    sum_data = np.dot(data, weight) + bias
    prob = expit(sum_data)
    rdm = np.random.random(prob.shape)
    index_on = rdm < prob
    samples = np.zeros(prob.shape)
    samples[index_on]=1.
    return samples

In [39]:
def avg_distr(a, b, w, sample_num, init_v):
    gibbs_v = np.zeros((sample_num, a.shape[0]))
    gibbs_v[0] = init_v
    for g_step in range(1, sample_num):
        gibbs_h = sigmoid_sampling(gibbs_v[g_step-1], w, b)
        gibbs_v[g_step] = sigmoid_sampling(gibbs_h, w.transpose(), a)
    return np.average(gibbs_v,0)

In [40]:
def plot_distance(sample_nums, dis_matrix, clabel, **kwargs):
    ax = kwargs.pop('ax', plt.gca())
    mmax = np.max(dis_matrix,0)
    mmin = np.min(dis_matrix,0)
    avg = np.average(dis_matrix,0)
    base_line, = ax.semilogx(sample_nums, avg, linewidth = 2., label = clabel, **kwargs)
    ax.fill_between(sample_nums, mmax, mmin, facecolor=base_line.get_color(), alpha=.4, linewidth=0)
    legend = ax.legend(loc='lower left', shadow=True)

In [41]:
train_x, train_y = pt.get_train_data()
train_x = train_x > 50

digit = 5
label_list = np.array(train_y).astype(int)
index_digit = np.where(label_list==digit)[0]
train_num = len(index_digit) - 1
index_train = index_digit[0:train_num]
Data_v = np.array(train_x[index_train]).astype(float)
dis_D = np.average(Data_v,0)

In [42]:
fname = '/home/liuq/apt/2ndYear/sDBN/theta/5420_b1000_epoc00903_cd.npy' #903
a_K, b_K, w_K = np.load(fname)
fname = '/home/liuq/apt/2ndYear/sDBN/theta/5420_b0001_epoc00001_cd.npy' #001
a_1, b_1, w_1 = np.load(fname)
fname = '/home/liuq/apt/2ndYear/sDBN/theta/5420_b0002_epoc00002_cd.npy'
#fname = '/home/liuq/apt/2ndYear/sDBN/theta/5420_b1000_epoc00001_cd.npy' #903
a_2, b_2, w_2 = np.load(fname)


In [None]:
trail_num = 5
test_per_trail = 10
sample_nums = np.array([])
dis_matrix1 = np.zeros((test_per_trail, trail_num))
dis_matrix2 = np.zeros((test_per_trail, trail_num))
dis_matrixK = np.zeros((test_per_trail, trail_num))
np.random.seed(0)
init_v = Data_v[0]

In [None]:
for i in range(trail_num):
    print i
    s_num = pow(10, i+1)
    sample_nums = np.append(sample_nums, [s_num])
    for j in range(test_per_trail):
        distr_1 = avg_distr(a_1, b_1, w_1, s_num, init_v)
        distr_2 = avg_distr(a_2, b_2, w_2, s_num, init_v)
        distr_K = avg_distr(a_K, b_K, w_K, s_num, init_v)
        #dis_matrix1[j, i] = np.sum(np.abs(dis_D-distr_1))
        #dis_matrix2[j, i] = np.sum(np.abs(dis_D-distr_2))
        #dis_matrixK[j, i] = np.sum(np.abs(dis_D-distr_K))
        dis_matrix1[j, i] = np.linalg.norm(dis_D-distr_1)
        dis_matrix2[j, i] = np.linalg.norm(dis_D-distr_2)
        dis_matrixK[j, i] = np.linalg.norm(dis_D-distr_K)

0
1
2
3

In [None]:
plt.clf()
plot_distance(sample_nums, dis_matrix1, 'CD_1')
plot_distance(sample_nums, dis_matrix2, 'CD_2')
plot_distance(sample_nums, dis_matrixK, 'CD_1K')
plt.xlabel('Sampling Number')
plt.ylabel('Euclidean Distance')
plt.title('Euclidean Distance Between Mean of Samples and Training Set')
plt.grid('on')
#plt.savefig('distr.pdf')
plt.show()