In [1]:
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import poisson_tools as pt
import matplotlib.patches as mpatches
from scipy.special import expit
import scipy.cluster.vq as spvq
import scipy.spatial.distance as spdt
import matplotlib.cm as cm
from scipy.optimize import curve_fit
import statsmodels.api as sm
#from scipy.stats import poisson
from scipy.misc import factorial

In [2]:
def sigmoid_sampling(data, weight, bias):
    sum_data = np.dot(data, weight) + bias
    prob = expit(sum_data)
    rdm = np.random.random(prob.shape)
    index_on = rdm < prob
    samples = np.zeros(prob.shape)
    samples[index_on]=1.
    return samples

In [3]:
def avg_distr(a, b, w, sample_num, init_v):
    gibbs_v = np.zeros((sample_num, a.shape[0]))
    gibbs_v[0] = init_v
    for g_step in range(1, sample_num):
        gibbs_h = sigmoid_sampling(gibbs_v[g_step-1], w, b)
        gibbs_v[g_step] = sigmoid_sampling(gibbs_h, w.transpose(), a)
    return gibbs_v, np.average(gibbs_v,0)

In [4]:
def plot_distance(sample_nums, dis_matrix, clabel, **kwargs):
    ax = kwargs.pop('ax', plt.gca())
    mmax = np.max(dis_matrix,0)
    mmin = np.min(dis_matrix,0)
    avg = np.average(dis_matrix,0)
    base_line, = ax.semilogx(sample_nums, avg, linewidth = 2., label = clabel, **kwargs)
    ax.fill_between(sample_nums, mmax, mmin, facecolor=base_line.get_color(), alpha=.4, linewidth=0)
    legend = ax.legend(loc='lower left', shadow=True)

In [None]:
def poisson(k, lamb):
    return (lamb**k/factorial(k)) * np.exp(-lamb)
def gauss(x, A, mu, sigma):
    return A*np.exp(-(x-mu)**2/(2.*sigma**2))

In [5]:
train_x, train_y = pt.get_train_data()
train_x = train_x > 50

digit = 5
label_list = np.array(train_y).astype(int)
index_digit = np.where(label_list==digit)[0]
train_num = len(index_digit) - 1
index_train = index_digit[0:train_num]
Data_v = np.array(train_x[index_train]).astype(float)
k_center = spvq.kmeans(Data_v, 1)
dis_D=[]
for i in range(len(Data_v)):
    dis_D.append(np.linalg.norm(Data_v[i]-k_center[0]))

In [6]:
his = np.histogram(dis_D) #, bins=[0, 1, 2, 3]
mid_bin = 0.5*(his[1][1:] + his[1][:-1])
stat = his[0]*1./sum(his[0])
plt.bar(mid_bin, stat)
plt.show()

In [7]:
#plt.plot(dis_D, '.')
#plt.show()
#popt, pcov = curve_fit(poisson, his[0], his[1][0:-1])
#res = sm.Poisson(dis_D,np.ones_like(dis_D)).fit()
#print res.summary()

In [None]:
'''
# fit with curve_fit
parameters, cov_matrix = curve_fit(gauss, mid_bin, stat)
#parameters, cov_matrix = curve_fit(poisson, mid_bin, stat) 

# plot poisson-deviation with fitted parameter
x_plot = np.linspace(5, 15, 1000)
plt.plot(mid_bin, stat)

plt.plot(x_plot, gauss(x_plot, *parameters), 'r-', lw=2)
#plt.plot(x_plot, poisson(x_plot, *parameters), 'r-', lw=2)
plt.show()
print parameters, np.mean(dis_D), np.var(dis_D)
'''

In [None]:
init_v = Data_v[0]
dis = {}
#for step in range(150, -1, -1):
for step in range(11924, 542-1, -542):
    fname = '/home/liuq/apt/2ndYear/sDBN/theta/5420_b1000_epoc%05d_cd.npy'%(step)
    #fname = '/home/liuq/apt/2ndYear/sDBN//5420_b1000_epoc%05d_cd.npy'%(step)
    a, b, w = np.load(fname)
    i = 3
    s_num = pow(10, i+1)
    dis[str(s_num)] = []
    data_g, tmp= avg_distr(a, b, w, s_num, init_v)
    for i in range(s_num):
        dis[str(s_num)].append(np.linalg.norm(data_g[i]-k_center[0]))
    his = np.histogram(dis[str(s_num)]) #, bins=[0, 1, 2, 3]
    mid_bin = 0.5*(his[1][1:] + his[1][:-1])
    stat = his[0]*1./sum(his[0])
    plt.bar(mid_bin, stat)
    
    parameters, cov_matrix = curve_fit(gauss, mid_bin, stat)
    x_plot = np.linspace(5, 15, 1000)

    plt.plot(x_plot, gauss(x_plot, *parameters), 'r-', lw=2)
    plt.show()

In [66]:
step = 1084
fname = '/home/liuq/apt/2ndYear/sDBN/theta/5420_b1000_epoc%05d_cd.npy'%(step)
#fname = '/home/liuq/apt/2ndYear/sDBN/theta/5420_b0001_epoc00021_cd.npy' #001
a, b, w = np.load(fname)
for pow_num in range(3, 4):
    print pow_num
    dis = []
    s_num = 1 * pow(10, pow_num+1)
    data_g, tmp= avg_distr(a, b, w, s_num, init_v)
    for i in range(s_num):
        dis.append(np.linalg.norm(data_g[i]-k_center[0]))
    his = np.histogram(dis) #, bins=[0, 1, 2, 3]
    mid_bin = 0.5*(his[1][1:] + his[1][:-1])
    stat = his[0]*1./sum(his[0])
    plt.bar(mid_bin, stat)
    
    parameters, cov_matrix = curve_fit(gauss, mid_bin, stat)
    x_plot = np.linspace(5, 15, 1000)

    plt.plot(x_plot, gauss(x_plot, *parameters), 'r-', lw=2)
    plt.show()

4
