-
Notifications
You must be signed in to change notification settings - Fork 418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Why the std of y_mean is so small? #114
Comments
Quick questions,
|
Thank u for responsing so fast def mean_field_variational(x,layer_sizes, n_particles):
with zs.BayesianNet() as variational:
ws = []
for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1],
layer_sizes[1:])):
w_mean = tf.get_variable(
'w_mean_' + str(i), shape=[1, n_out, n_in + 1],
initializer=tf.constant_initializer(0.))
w_logstd = tf.get_variable(
'w_logstd_' + str(i), shape=[1, n_out, n_in + 1],
initializer=tf.constant_initializer(0.))
ws.append(
zs.Normal('w' + str(i), w_mean, logstd=w_logstd,
n_samples=n_particles, group_ndims=2))
# forward
ly_x = tf.expand_dims(
tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]), 3)
for i in range(len(ws)):
w = tf.tile(ws[i], [1, tf.shape(x)[0], 1, 1])
ly_x = tf.concat(
[ly_x, tf.ones([n_particles, tf.shape(x)[0], 1, 1])], 2)
ly_x = tf.matmul(w, ly_x) / tf.sqrt(tf.to_float(tf.shape(ly_x)[2]))
if i < len(ws) - 1:
ly_x = tf.nn.relu(ly_x)
#print("qw ly_xshape")
#print(ly_x.shape)
y_mean = tf.squeeze(ly_x, [2, 3])
return variational, y_mean
variational,y_qw = mean_field_variational(x,layer_sizes, n_particles) the y_qw is what i want, the shape is [1,5000,200], 5000 is the numbers of samples. And i count std and mean by it ,looks good... And i tested on pyro by its example https://github.com/uber/pyro/blob/dev/examples/bayesian_regression.py |
which is neater IMO. I would be surprised if they produce different results.
(*) The more principled approach is to build a hierarchical model. But you still need to think about the hierarchical prior, or do some diagnosis afterwards. |
below is the codes of bnn: X_train, Y_train = Variable(torch.tensor(X)), Variable(torch.tensor(Y)).reshape(200,1)
X_test, Y_test = Variable(torch.tensor(X)), Variable(torch.tensor(Y)).reshape(200,1)
print(X_train.shape,Y_train.shape)
data = torch.cat((X_train, Y_train), 1)
def get_batch_indices(N, batch_size):
all_batches = np.arange(0, N, batch_size)
if all_batches[-1] != N:
all_batches = list(all_batches) + [N]
return all_batches
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer
self.predict = torch.nn.Linear(n_hidden, 1) # output layer
def forward(self, x):
x = self.hidden(x)
x = self.predict(x)
return x
first_layer = len(X_train.data.numpy()[0])
second_layer = 25
softplus = nn.Softplus()
regression_model = Net(first_layer, second_layer)
def model(data):
mu = Variable(torch.zeros(second_layer, first_layer)).type_as(data)
sigma = Variable(torch.ones(second_layer, first_layer)).type_as(data)
bias_mu = Variable(torch.zeros(second_layer)).type_as(data)
bias_sigma = Variable(torch.ones(second_layer)).type_as(data)
w_prior, b_prior = Normal(mu, sigma), Normal(bias_mu, bias_sigma)
mu2 = Variable(torch.zeros(1, second_layer)).type_as(data)
sigma2 = Variable(torch.ones(1, second_layer)).type_as(data)
bias_mu2 = Variable(torch.zeros(1)).type_as(data)
bias_sigma2 = Variable(torch.ones(1)).type_as(data)
w_prior2, b_prior2 = Normal(mu2, sigma2), Normal(bias_mu2, bias_sigma2)
priors = {'hidden.weight': w_prior,
'hidden.bias': b_prior,
'predict.weight': w_prior2,
'predict.bias': b_prior2}
lifted_module = pyro.random_module("module", regression_model, priors)
lifted_reg_model = lifted_module()
with pyro.iarange("map", N, subsample=data):
x_data = data[:, :-1]
y_data = data[:, -1]
# run the regressor forward conditioned on inputs
prediction_mean = lifted_reg_model(x_data).squeeze()
pyro.sample("obs",
Normal(prediction_mean, Variable(torch.ones(data.size(0))).type_as(data)),
obs=y_data.squeeze())
def guide(data):
#print(data.data)
w_mu = Variable(torch.randn(second_layer, first_layer).type(torch.float64), requires_grad=True)
w_log_sig = Variable(0.1 * torch.ones(second_layer, first_layer).type_as(data.data), requires_grad=True)
b_mu = Variable(torch.randn(second_layer).type_as(data.data), requires_grad=True)
b_log_sig = Variable(0.1 * torch.ones(second_layer).type_as(data.data), requires_grad=True)
# register learnable params in the param store
mw_param = pyro.param("guide_mean_weight", w_mu)
sw_param = softplus(pyro.param("guide_log_sigma_weight", w_log_sig))
mb_param = pyro.param("guide_mean_bias", b_mu)
sb_param = softplus(pyro.param("guide_log_sigma_bias", b_log_sig))
# gaussian guide distributions for w and b
w_dist = Normal(mw_param, sw_param)
b_dist = Normal(mb_param, sb_param)
w_mu2 = Variable(torch.randn(1, second_layer).type_as(data.data), requires_grad=True)
w_log_sig2 = Variable(0.1 * torch.randn(1, second_layer).type_as(data.data), requires_grad=True)
b_mu2 = Variable(torch.randn(1).type_as(data.data), requires_grad=True)
b_log_sig2 = Variable(0.1 * torch.ones(1).type_as(data.data), requires_grad=True)
# register learnable params in the param store
mw_param2 = pyro.param("guide_mean_weight2", w_mu2)
sw_param2 = softplus(pyro.param("guide_log_sigma_weight2", w_log_sig2))
mb_param2 = pyro.param("guide_mean_bias2", b_mu2)
sb_param2 = softplus(pyro.param("guide_log_sigma_bias2", b_log_sig2))
# gaussian guide distributions for w and b
w_dist2 = Normal(mw_param2, sw_param2)
b_dist2 = Normal(mb_param2, sb_param2)
dists = {'hidden.weight': w_dist,
'hidden.bias': b_dist,
'predict.weight': w_dist2,
'predict.bias': b_dist2}
# overloading the parameters in the module with random samples from the guide distributions
lifted_module = pyro.random_module("module", regression_model, dists)
# sample a regressor
return lifted_module()
# instantiate optim and inference objects
optim = Adam({"lr": 0.001})
elbo = Trace_ELBO()
svi = SVI(model, guide, optim, loss=elbo)
N = len(X_train)
for j in range(10000):
epoch_loss = 0.0
perm = torch.randperm(N)
# shuffle data
data = data[perm]
# get indices of each batch
all_batches = get_batch_indices(N, 64)
for ix, batch_start in enumerate(all_batches[:-1]):
batch_end = all_batches[ix + 1]
batch_data = data[batch_start: batch_end]
epoch_loss += svi.step(batch_data)
if j % 100 == 0:
print(j, "avg loss {}".format(epoch_loss/float(N)))
preds = []
for i in range(10000):
sampled_reg_model = guide(X_test)
pred = sampled_reg_model(X_test).data.numpy().flatten()
preds.append(pred)
preds = np.array(preds)
mean = np.mean(preds, axis=0)
std = np.std(preds, axis=0)/10
y_test = Y_test.data.numpy()
x = np.arange(len(y_test))
plt.xlim((0, y_test.shape[0]))
sm = np.array([x for x in range(y_test.shape[0])])
plt.plot(sm, y_test[:y_test.shape[0]])
plt.fill_between(x, mean-std, mean+std, alpha = 0.3, color = 'orange')
plt.show() |
And this is the codes i uses zhusuan now def main():
#tf.set_random_seed(1237)
#np.random.seed(1234)
# Load UCI Boston housing data
#data_path = os.path.join(conf.data_dir, 'housing.data')
#x_train, y_train, x_valid, y_valid, x_test, y_test = \
#dataset.load_uci_boston_housing(data_path)
#x_train = np.vstack([x_train, x_valid])
#y_train = np.hstack([y_train, y_valid])
x_train,y_train = rollout(policy=random_policy, timesteps=STEP)
for i in range(1,50):
X_, Y_ = rollout(policy=random_policy, timesteps=STEP)
x_train = np.vstack((x_train, X_)).astype('float32')
y_train = np.vstack((y_train, Y_)).astype('float32')
N, n_x = x_train.shape
x_test,y_test1 = rollout(policy=random_policy, timesteps=STEP)
for i in range(1,50):
X_, Y_ = rollout(policy=random_policy, timesteps=STEP)
x_test = np.vstack((x_test, X_)).astype('float32')
y_test1 = np.vstack((y_test1, Y_)).astype('float32')
# Standardize data
x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
y_train, y_test1)
#print(x_train.shape)
#print(y_train.shape)
#print(mean_y_train)
#print(std_y_train)
# Define model parameters
#std_y_train = np.std(y_train)
y_train = y_train.reshape(-1)
y_test = y_test.reshape(-1)
#print(y_train.shape)
n_hiddens = [50]
print(N,n_x)
# Build the computation graph
n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
x = tf.placeholder(tf.float32, shape=[None, n_x])
y = tf.placeholder(tf.float32, shape=[None])
layer_sizes = [n_x] + n_hiddens + [1]
print(layer_sizes)
w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)]
def log_joint(observed):
model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
log_pws = model.local_log_prob(w_names)
log_py_xw = model.local_log_prob('y')
return tf.add_n(log_pws) + log_py_xw * N
variational,y_qw = mean_field_variational(x,layer_sizes, n_particles)
qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True)
latent = dict(zip(w_names, qw_outputs))
lower_bound = zs.variational.elbo(
log_joint, observed={'y': y}, latent=latent, axis=0)
cost = tf.reduce_mean(lower_bound.sgvb())
lower_bound = tf.reduce_mean(lower_bound)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
infer_op = optimizer.minimize(cost)
# prediction: rmse & log likelihood
observed = dict((w_name, latent[w_name][0]) for w_name in w_names)
observed.update({'y': y})
model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
#y_output = variational.outputs('y')
y_pred = tf.reduce_mean(y_mean, 0)
rmse = tf.sqrt(tf.reduce_mean((y_pred - y) ** 2)) * std_y_train
log_py_xw = model.local_log_prob('y')
log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \
tf.log(std_y_train)
# Define training/evaluation parameters
lb_samples = 10
ll_samples = 5000
epochs = 500
batch_size = 10
iters = int(np.floor(x_train.shape[0] / float(batch_size)))
test_freq = 10
# Run the inference
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(1, epochs + 1):
lbs = []
for t in range(iters):
x_batch = x_train[t * batch_size:(t + 1) * batch_size]
#print(x_batch.shape)
y_batch = y_train[t * batch_size:(t + 1) * batch_size]
#print(y_batch.shape)
_, lb,cost_ = sess.run(
[infer_op, lower_bound,cost],
feed_dict={n_particles: lb_samples,
x: x_batch, y: y_batch})
lbs.append(lb)
print('Epoch {}: Lower bound = {}:cost={}'.format(epoch, np.mean(lbs),cost_))
if epoch % test_freq == 0:
test_lb, test_rmse, test_ll = sess.run(
[lower_bound, rmse, log_likelihood],
feed_dict={n_particles: ll_samples,
x: x_test, y: y_test})
print('>> TEST')
print('>> Test lower bound = {}, rmse = {}, log_likelihood = {}'
.format(test_lb, test_rmse, test_ll))
y_pred1 = []
y_pred_ = sess.run([y_qw],feed_dict={n_particles: ll_samples,x: x_test, y: y_test})
y_pred_ = np.array(y_pred_[0])
preds = np.array(y_pred_*std_y_train+mean_y_train)
mean = np.mean(preds,axis=0)
print(mean.shape)
std = np.std(preds,axis = 0)
print(std.shape)
#for i in range(100):
#y_pred_ = sess.run([y_qw],feed_dict={n_particles: ll_samples,x: x_test, y: y_test})
#y_pred_ = np.array(y_pred_[0]).reshape(-1,1)
#y_pred1.append(y_pred_)
#print(np.array(y_pred_).shape)
#print(y_pred1.shape)
#preds = np.array(y_pred1*std_y_train+mean_y_train)
#print(preds.shape)
#mean = np.mean(preds, axis=0).reshape(200)
#std = np.std(preds, axis=0).reshape(200)
x = np.arange(200).reshape(200)
#print(y_test.shape)
#print(preds.shape)
#print(mean.shape)
#print(std)
#print(x.shape)
#plt.figure()
#plt.plot(data[:,0:-1])
#plt.plot(data[:,-1], linestyle = '--')
#plt.show()
plt.figure()
plt.plot(x, y_test1)
plt.plot(x, mean, linestyle = '--')
#plt.plot(x, mean-std, linestyle = '--')
#plt.plot(x, mean+std, linestyle = '--')
plt.fill_between(x, mean-std, mean+std, alpha = 0.3, color = 'orange')
plt.show() |
I changed the code, not only reuse it, original code is def mean_field_variational(x,layer_sizes, n_particles):
with zs.BayesianNet() as variational:
ws = []
for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
w_mean = tf.get_variable('w_mean_' + str(i), shape=[1, n_out, n_in + 1],
initializer=tf.constant_initializer(0.))
w_logstd = tf.get_variable('w_logstd_' + str(i), shape=[1, n_out, n_in + 1],
initializer=tf.constant_initializer(0.))
ws.append(zs.Normal('w' + str(i), w_mean, logstd=w_logstd,
n_samples=n_particles, group_ndims=2))
return variational without forward in mean_field_variational, and i add forward parts |
It seems your torch code implements a deep linear network, while the Zhusuan model has ReLU nonlinearity. In that case you should certainly expect different results... |
Hi, Thank u for your great share first! And when i test the examples\bayesian_neural_nets\bayesian_nn.py, I found that the std of y_mean what i want to predict is too small. As the same data ,when i tested on pyro of torch, the std looks good.
And i read the code, i found that the y_mean is counted by likelihood function, as well as it should be count by q(w)?
The text was updated successfully, but these errors were encountered: