In [1]:
import tensorflow as tf
from tensorflow.contrib.losses import mean_squared_error
import numpy as np
import noisy_mark_dataset
from math import floor
from datetime import datetime


In [2]:
start = datetime.now()
print('Loading data...')

data_sets = noisy_mark_dataset.load()

elapsed = datetime.now() - start

print('Data loaded in {} seconds'.format(elapsed))

Loading data...
Data loaded in 0:00:07.564910 seconds


In [3]:
SIZE = 100 * 100
PART_SIZE = 50
x = tf.placeholder(tf.float32, [None, SIZE])

W1 = tf.Variable(tf.zeros([SIZE, PART_SIZE]))
b1 = tf.Variable(tf.zeros([PART_SIZE]))
h = tf.sigmoid(tf.matmul(x, W1) + b1)

W2 = tf.Variable(tf.zeros([PART_SIZE, SIZE]))
b2 = tf.Variable(tf.zeros([SIZE]))
y = tf.sigmoid(tf.matmul(h, W2) + b2)

y_ = tf.placeholder(tf.float32, [None, SIZE])

In [4]:
beta = 1.0
rho = 0.1
lmbd = 0.03


def squared_frobenius_norm(tensor):
    with tf.name_scope("squared_frobenius_norm"):
        squares = tf.square(tensor)
        squared_frobenius = tf.reduce_sum(squares)
        return squared_frobenius
    
def kullback_leibler(tensor):
    with tf.name_scope("kullback_leibler_divergence"):
        shape = tensor.get_shape()
        N = shape[0]
        K = shape[1]
        
        total = 0
        for k in range(K):
            rho_j = tf.reduce_mean(tf.slice(tensor, [0, k], [-1, 1]))
            kl = rho * tf.log(rho / rho_j) + (1 - rho) * tf.log((1 - rho) / (1 - rho_j))
            total += kl
            
        return total

In [5]:
mean_squared_loss = mean_squared_error(labels=y_, predictions=y)
regularization = (lmbd / 2) * (squared_frobenius_norm(W1) + squared_frobenius_norm(W2))
kl_divergence = kullback_leibler(h)

loss = mean_squared_loss + beta * kl_divergence + regularization

In [6]:
train_step = tf.train.GradientDescentOptimizer(9.0).minimize(loss)

sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

train_marks = data_sets.train.marks()

# Train
epochs = 15
batch_size = 50
batches = floor(len(data_sets.train.marks()) / batch_size)

train = data_sets.train
test_noises = data_sets.test.noisy()
test_marks = data_sets.test.marks()

for epoch in range(epochs):
    for batch in range(batches):
        batch_noises, batch_marks = train.next_batch(batch_size)
        sess.run(train_step, feed_dict={ x: batch_noises, y_: batch_marks })
    
    print('Epoch {}'.format(epoch))
    print(sess.run(loss, feed_dict={ x: test_noises, y_: test_marks }))
    

# print(sess.run(y, feed_dict={ x: test_noises, y_: test_marks }))

Epoch 0
673.662
Epoch 1
3990.55
Epoch 2
308.311
Epoch 3
1826.42
Epoch 4
147.969
Epoch 5
302.485
Epoch 6
164.565
Epoch 7
161.948
Epoch 8
173.255
Epoch 9
3647.67
Epoch 10
269.646
Epoch 11
2567.66
Epoch 12
416.074
Epoch 13
325.777
Epoch 14
1792.4


In [None]:
###### W/ KL Divergence
#### Learning Rate 9.0
### 0 Noise, SIZE 100 * 100
## lambda 0.03
# 26682.3 - 5119.43, 50 * 50

In [7]:
###### No KL Divergence in Loss
#### Learning Rate 9.0
### 0 Noise, SIZE 100 * 100
## lambda 0.3
# 6.45e+06 - 6.33e+12, 50 * 50
## lambda 0.03
# 131.964 - 131.85, 50 * 50

#### Learning Rate 3.0
### 0 Noise, SIZE 100 * 100
## lambda 1.0
# 1.818e+07 - 1.22e+15, 50 * 50
## lambda 0.3
# 132.386 - 132.668, 50 * 50

#### Learning Rate 1.0
### Noise 1/20, 0-54 values, SIZE 100 * 100
## lambda 0.5
# 132.262 - 132.231, no constriction
# 132.21 - 132.2, 50 * 50
# 132.158 - 132.157, 50
# 132.159 - 132.16, 5
## lambda 0.1
# 131.964 - 131.976, 50 * 50
### Noise 1/3, 0-150 values, SIZE 100 * 100
## lambda 0.1
# 131.97 - 131.97, 50 * 50
### 0 Noise, SIZE 100 * 100
## lambda 1.0
# 132.259 - 132.246, 50 * 50
## lambda 0.1
# 131.797 - 131.802, no constriction
# 131.97 - 131.977, 50 * 50
# 132.036 - 132.045, 50 * 50
## lambda 0.03
# 131.569 - 131.558, no constriction
# 131.809 - 131.751, 50 * 50
# 131.808 - 131.748, 50 * 50