In [1]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os

import tensorflow as tf
from six.moves import range, zip
import numpy as np
import zhusuan as zs

In [None]:
def standardize_data(X_train, X_test, X_valid):
    X_mean = np.mean(X_train, axis=0)
    X_std = np.std(X_train, axis=0)

    X_train -= X_mean
    X_train /= X_std
    X_test -= X_mean
    X_test /= X_std
    X_valid -= X_mean
    X_valid /= X_std

    return X_train, X_test, X_valid

def standardize_data_with_std(y_train, y_test):
    y_mean = np.mean(y_train, axis=0)
    y_std = np.std(y_train, axis=0)

    y_train -= y_mean
    y_train /= y_std
    y_test -= y_mean
    y_test /= y_std
    
    return y_train, y_test, y_mean, y_std

In [2]:
@zs.meta_bayesian_net(scope="bnn", reuse_variables=True)
def build_bnn(x, layer_sizes, n_particles):
    bn = zs.BayesianNet()
    h = tf.tile(x[None, ...], [n_particles, 1, 1])
    for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        w = bn.normal("w" + str(i), tf.zeros([n_out, n_in + 1]), std=1.,
                      group_ndims=2, n_samples=n_particles)
        h = tf.concat([h, tf.ones(tf.shape(h)[:-1])[..., None]], -1)
        h = tf.einsum("imk,ijk->ijm", w, h) / tf.sqrt(
            tf.cast(tf.shape(h)[2], tf.float32))
        if i < len(layer_sizes) - 2:
            h = tf.nn.relu(h)

    y_mean = bn.deterministic("y_mean", tf.squeeze(h, 2))
    y_logstd = tf.get_variable("y_logstd", shape=[],
                               initializer=tf.constant_initializer(0.))
    bn.normal("y", y_mean, logstd=y_logstd)
    return bn

In [3]:
@zs.reuse_variables(scope="variational")
def build_mean_field_variational(layer_sizes, n_particles):
    bn = zs.BayesianNet()
    for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        w_mean = tf.get_variable(
            "w_mean_" + str(i), shape=[n_out, n_in + 1],
            initializer=tf.constant_initializer(0.))
        w_logstd = tf.get_variable(
            "w_logstd_" + str(i), shape=[n_out, n_in + 1],
            initializer=tf.constant_initializer(0.))
        bn.normal("w" + str(i), w_mean, logstd=w_logstd,
                  n_samples=n_particles, group_ndims=2)
    return bn




In [4]:
def main():
    tf.set_random_seed(10)
    np.random.seed(90)

    #Data loading
    dataset = tf.data.experimental.CsvDataset(
    "High_Tc_train.csv",
    [tf.float32,  # Required field, use dtype or empty tensor
     tf.constant([0.0], dtype=tf.float32),  # Optional field, default to 0.0
     tf.int32,  # Required field, use dtype or empty tensor
     ],
    )
    
    dataset_size = tf.size(dataset)
    
    #Divide into features and labels
    dataset_size = tf.size(dataset)
    train_size = int(0.7 * dataset_size)
    test_size = int(0.3 * dataset_size)

    dataset = dataset.shuffle()
    train_dataset = full_dataset.take(train_size)
    test_dataset = full_dataset.skip(train_size)       

    
    x_train = train_dataset.iloc[:,:-1]
    y_train = train_dataset.iloc[:,:-1]
    x_test = test_dataset.iloc[:,:-1]
    y_test = test_dataset.iloc[:,:-1]
    
    
    n_train, x_dim = x_train.shape

    # Standardize data
    x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
    y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
        y_train, y_test)

    # Define model parameters
    n_hiddens = [100]

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")
    x = tf.placeholder(tf.float32, shape=[None, x_dim])
    y = tf.placeholder(tf.float32, shape=[None])
    layer_sizes = [x_dim] + n_hiddens + [1]
    w_names = ["w" + str(i) for i in range(len(layer_sizes) - 1)]

    model = build_bnn(x, layer_sizes, n_particles)
    variational = build_mean_field_variational(layer_sizes, n_particles)

    def log_joint(bn):
        log_pws = bn.cond_log_prob(w_names)
        log_py_xw = bn.cond_log_prob('y')
        return tf.add_n(log_pws) + tf.reduce_mean(log_py_xw, 1) * n_train

    model.log_joint = log_joint

    lower_bound = zs.variational.elbo(
        model, {'y': y}, variational=variational, axis=0)
    cost = lower_bound.sgvb()

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    infer_op = optimizer.minimize(cost)

    # prediction: rmse & log likelihood
    y_mean = lower_bound.bn["y_mean"]
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y) ** 2)) * std_y_train
    
    total_error = tf.reduce_sum(tf.square(tf.subtract(y, tf.reduce_mean(y))))
    unexplained_error = tf.reduce_sum(tf.square(tf.subtract(y, y_pred)))
    r_squared = tf.subtract(tf.div(total_error, unexplained_error),1.0)
    
    log_py_xw = lower_bound.bn.cond_log_prob("y")
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - tf.log(
        std_y_train)

    # Define training/evaluation parameters
    lb_samples = 100
    ll_samples = 10000
    epochs = 1000
    batch_size = 10
    iters = (n_train-1) // batch_size + 1
    test_freq = 10

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            perm = np.random.permutation(x_train.shape[0])
            x_train = x_train[perm, :]
            y_train = y_train[perm]
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                y_batch = y_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run(
                    [infer_op, lower_bound],
                    feed_dict={n_particles: lb_samples,
                               x: x_batch, y: y_batch})
                lbs.append(lb)
            print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                test_rmse, test_ll, test_r = sess.run(
                    [rmse, log_likelihood, r_squared],
                    feed_dict={n_particles: ll_samples,
                               x: x_test, y: y_test})
                print('>> TEST')
                print('>> Test rmse = {}, log_likelihood = {}, r_squared = {}'
                      .format(test_rmse, test_ll, test_r))

In [5]:
if __name__ == "__main__":
    main()




Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1: Lower bound = -725.4192504882812
Epoch 2: Lower bound = -697.8917236328125
Epoch 3: Lower bound = -653.2720947265625
Epoch 4: Lower bound = -592.192626953125
Epoch 5: Lower bound = -554.4517822265625
Epoch 6: Lower bound = -544.1273193359375
Epoch 7: Lower bound = -546.5131225585938
Epoch 8: Lower bound = -533.01953125
Epoch 9: Lower bound = -533.8604736328125
Epoch 10: Lower bound = -526.9624633789062
>> TEST
>> Test rmse = 5.130523204803467, log_likelihood = -3.022861957550049, r_squared = -1.6757659912109375
Epoch 11: Lower bound = -538.0867309570312
Epoch 12: Lower bound = -527.0652465820312
Epoch 13: Lower bound = -523.5552978515625
Epoch 14: Lower bound = -524.8297119140625
Epoch 15: Lower bound = -520.8697509765625
Epoch 16: Lower bound = -524.641845703125
Epoch 17: Lower bound = -521.964599609375
Epoch 18: Lower bound = -516.8428344726562
Epoch 19: Lower bound = -519.8618774414062
Epoch 20

Epoch 140: Lower bound = -483.237548828125
>> TEST
>> Test rmse = 4.786281108856201, log_likelihood = -2.9032163619995117, r_squared = -1.1803951263427734
Epoch 141: Lower bound = -473.9273986816406
Epoch 142: Lower bound = -467.49945068359375
Epoch 143: Lower bound = -474.6827392578125
Epoch 144: Lower bound = -472.995849609375
Epoch 145: Lower bound = -469.80596923828125
Epoch 146: Lower bound = -469.29327392578125
Epoch 147: Lower bound = -471.953125
Epoch 148: Lower bound = -471.98309326171875
Epoch 149: Lower bound = -471.3066711425781
Epoch 150: Lower bound = -467.8601379394531
>> TEST
>> Test rmse = 4.543425559997559, log_likelihood = -2.8733348846435547, r_squared = -0.7611498832702637
Epoch 151: Lower bound = -475.7891540527344
Epoch 152: Lower bound = -480.5811767578125
Epoch 153: Lower bound = -465.5335388183594
Epoch 154: Lower bound = -475.64898681640625
Epoch 155: Lower bound = -469.3809509277344
Epoch 156: Lower bound = -466.1527404785156
Epoch 157: Lower bound = -468.97

Epoch 289: Lower bound = -457.9539794921875
Epoch 290: Lower bound = -458.2725830078125
>> TEST
>> Test rmse = 3.920166254043579, log_likelihood = -2.7528133392333984, r_squared = 0.6938486099243164
Epoch 291: Lower bound = -461.2083435058594
Epoch 292: Lower bound = -457.7524719238281
Epoch 293: Lower bound = -458.2931823730469
Epoch 294: Lower bound = -457.51727294921875
Epoch 295: Lower bound = -459.35296630859375
Epoch 296: Lower bound = -460.389404296875
Epoch 297: Lower bound = -459.087890625
Epoch 298: Lower bound = -463.639404296875
Epoch 299: Lower bound = -460.33184814453125
Epoch 300: Lower bound = -465.447265625
>> TEST
>> Test rmse = 3.8978304862976074, log_likelihood = -2.7530899047851562, r_squared = 0.7592902183532715
Epoch 301: Lower bound = -461.4994812011719
Epoch 302: Lower bound = -456.76971435546875
Epoch 303: Lower bound = -460.1501770019531
Epoch 304: Lower bound = -462.47088623046875
Epoch 305: Lower bound = -459.5576171875
Epoch 306: Lower bound = -464.6168518

Epoch 438: Lower bound = -463.65191650390625
Epoch 439: Lower bound = -461.147216796875
Epoch 440: Lower bound = -475.72784423828125
>> TEST
>> Test rmse = 3.9857189655303955, log_likelihood = -2.7551639080047607, r_squared = 0.5080962181091309
Epoch 441: Lower bound = -461.6631164550781
Epoch 442: Lower bound = -465.19891357421875
Epoch 443: Lower bound = -457.3375549316406
Epoch 444: Lower bound = -460.6043701171875
Epoch 445: Lower bound = -462.529052734375
Epoch 446: Lower bound = -460.5952453613281
Epoch 447: Lower bound = -457.7446594238281
Epoch 448: Lower bound = -461.0580749511719
Epoch 449: Lower bound = -461.85113525390625
Epoch 450: Lower bound = -457.7628173828125
>> TEST
>> Test rmse = 3.876331090927124, log_likelihood = -2.747105121612549, r_squared = 0.8233532905578613
Epoch 451: Lower bound = -462.0819396972656
Epoch 452: Lower bound = -456.166748046875
Epoch 453: Lower bound = -463.56121826171875
Epoch 454: Lower bound = -457.9732360839844
Epoch 455: Lower bound = -45

Epoch 586: Lower bound = -464.4961242675781
Epoch 587: Lower bound = -460.08831787109375
Epoch 588: Lower bound = -459.0848388671875
Epoch 589: Lower bound = -463.5626220703125
Epoch 590: Lower bound = -459.3152160644531
>> TEST
>> Test rmse = 3.8129396438598633, log_likelihood = -2.7418160438537598, r_squared = 1.0185937881469727
Epoch 591: Lower bound = -459.3729248046875
Epoch 592: Lower bound = -462.66583251953125
Epoch 593: Lower bound = -460.8326721191406
Epoch 594: Lower bound = -464.99542236328125
Epoch 595: Lower bound = -461.3978576660156
Epoch 596: Lower bound = -459.75067138671875
Epoch 597: Lower bound = -462.60418701171875
Epoch 598: Lower bound = -469.15191650390625
Epoch 599: Lower bound = -462.78973388671875
Epoch 600: Lower bound = -464.18402099609375
>> TEST
>> Test rmse = 3.9636764526367188, log_likelihood = -2.7582831382751465, r_squared = 0.5695290565490723
Epoch 601: Lower bound = -456.93206787109375
Epoch 602: Lower bound = -465.48760986328125
Epoch 603: Lower b

Epoch 734: Lower bound = -465.8554382324219
Epoch 735: Lower bound = -460.67523193359375
Epoch 736: Lower bound = -461.4127502441406
Epoch 737: Lower bound = -458.8741149902344
Epoch 738: Lower bound = -465.1227111816406
Epoch 739: Lower bound = -461.806396484375
Epoch 740: Lower bound = -459.66644287109375
>> TEST
>> Test rmse = 3.850789785385132, log_likelihood = -2.7439517974853516, r_squared = 0.9008593559265137
Epoch 741: Lower bound = -460.0365295410156
Epoch 742: Lower bound = -456.98992919921875
Epoch 743: Lower bound = -459.9307861328125
Epoch 744: Lower bound = -460.69671630859375
Epoch 745: Lower bound = -464.4455261230469
Epoch 746: Lower bound = -463.5025329589844
Epoch 747: Lower bound = -458.8150329589844
Epoch 748: Lower bound = -462.4783020019531
Epoch 749: Lower bound = -461.1524353027344
Epoch 750: Lower bound = -461.6094970703125
>> TEST
>> Test rmse = 3.9002490043640137, log_likelihood = -2.7463226318359375, r_squared = 0.7521500587463379
Epoch 751: Lower bound = -

Epoch 883: Lower bound = -459.6554870605469
Epoch 884: Lower bound = -462.487548828125
Epoch 885: Lower bound = -457.5318908691406
Epoch 886: Lower bound = -454.6274719238281
Epoch 887: Lower bound = -464.19195556640625
Epoch 888: Lower bound = -462.89874267578125
Epoch 889: Lower bound = -460.3133544921875
Epoch 890: Lower bound = -464.08343505859375
>> TEST
>> Test rmse = 3.881465435028076, log_likelihood = -2.739084482192993, r_squared = 0.807957649230957
Epoch 891: Lower bound = -465.520751953125
Epoch 892: Lower bound = -464.4068908691406
Epoch 893: Lower bound = -462.9688415527344
Epoch 894: Lower bound = -459.6605529785156
Epoch 895: Lower bound = -461.37542724609375
Epoch 896: Lower bound = -461.754150390625
Epoch 897: Lower bound = -462.9823303222656
Epoch 898: Lower bound = -458.3606872558594
Epoch 899: Lower bound = -461.4613952636719
Epoch 900: Lower bound = -459.74957275390625
>> TEST
>> Test rmse = 3.972175121307373, log_likelihood = -2.7628049850463867, r_squared = 0.545