# Prepare data for input

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import edward as ed

In [2]:
ed.set_seed(12227)

In [3]:
dat = pd.read_csv('data/prepared_data.csv')
print(dat.shape)
dat.head()

(687276, 7)


Unnamed: 0,stim,unit,isfirst,isrewarded,count,trial,time
0,0,1068,0,0,4,57277,0
1,0,1069,0,0,1,57277,0
2,0,1070,0,0,6,57277,0
3,0,1071,0,0,0,57277,0
4,0,1072,1,0,3,60250,0


Columns are:
- stim: original stimulus number
- unit: recorded neuron number
- isfirst, isrewarded: potential regressors of interest
- count: spike count during stimulus display period
- trial: original trial number
- time: unique stimulus code in trimmed dataset (very rare stims removed)

We need to do a couple of things to prep the data:
- turn unit codes into a 1-based unit index
- turn time into a 1-based stim index

In [4]:
# subset data for testing
dat = dat.query("unit < 1050 & time < 300")
dat.shape

(27038, 7)

In [5]:
_, unit = np.unique(dat.unit, return_inverse=True)
_, stim = np.unique(dat.time, return_inverse=True)

print(np.min(unit), np.max(unit), len(np.unique(unit)))

print(np.min(stim), np.max(stim), len(np.unique(stim)))

0 48 49
0 283 284


In [6]:
count = dat['count'].values
Xdat = dat[['isfirst', 'isrewarded']].values
Xdat.shape

(27038, 2)

# Define the model

In [7]:
# define some needed constants
N = dat.shape[0]  # number of trials
NU = len(np.unique(unit))  # number of units
NS = len(np.unique(stim))  # number of stims
P = Xdat.shape[1]  # number of specified regressors
K = 5  # number of latents

## Generative (p) model

In [8]:
with tf.variable_scope("pmodel"):
    A = ed.models.Normal(mu=tf.zeros(NU), sigma=tf.ones(NU), name='A')
    B = ed.models.Normal(mu=tf.zeros((NU, P)), sigma=tf.ones((NU, P)), name='B')
    C = ed.models.Normal(mu=tf.zeros((NU, K)), sigma=tf.ones((NU, K)), name='C')

    X = ed.placeholder(tf.float32, (N, P), name='X')

    delta = ed.models.Beta(a=3 * tf.ones(K), b=tf.ones(K), name='delta')
    tf.scalar_summary('mean_delta', tf.reduce_mean(delta))
    log_delta = tf.log(delta)
    tf.scalar_summary('min_log_delta', tf.reduce_min(log_delta))
    tf.scalar_summary('mean_log_delta', tf.reduce_mean(log_delta))

    pi = tf.exp(tf.cumsum(log_delta), name='pi')
    tf.scalar_summary('min_pi', tf.reduce_min(pi))

    Z = ed.models.Bernoulli(p=tf.tile(tf.expand_dims(pi, 0), [NS, 1]), name='Z')
    tf.scalar_summary('mean_Z', tf.reduce_mean(tf.to_float(Z)))

    sig = ed.models.Normal(mu=[-0.1], sigma=[0.1], name='sig')

    lam = ed.models.Normal(mu=(tf.gather(A, unit) + tf.reduce_sum(tf.gather(B, unit) * X, 1) + 
           tf.reduce_sum(tf.gather(C, unit) * tf.gather(tf.to_float(Z), stim), 1)), 
                           sigma=tf.exp(sig), name='lam')
    tf.scalar_summary('mean_lam', tf.reduce_mean(lam))


    cnt = ed.models.Poisson(lam=tf.nn.softplus(lam), value=tf.ones(N), name='cnt')

# Recognition (q) model

In [9]:
with tf.variable_scope("qmodel"):
    q_A = ed.models.NormalWithSoftplusSigma(mu=tf.Variable(tf.random_normal((NU,))), 
                                            sigma=tf.Variable(tf.random_uniform((NU,))),
                                            name='A')
    q_B = ed.models.NormalWithSoftplusSigma(mu=tf.Variable(tf.random_normal((NU, P))), 
                                            sigma=tf.Variable(tf.random_uniform((NU, P))),
                                            name='B')
    q_C = ed.models.NormalWithSoftplusSigma(mu=tf.Variable(tf.random_normal((NU, K))), 
                                            sigma=tf.Variable(tf.random_uniform((NU, K))),
                                            name='C')
    q_Z = ed.models.BernoulliWithSigmoidP(p=tf.Variable(tf.random_normal((NS, K))), name='Z')
    tf.scalar_summary('mean_q_Z', tf.reduce_mean(tf.to_float(Z)))

    q_delta = ed.models.BetaWithSoftplusAB(a=tf.Variable(1 + tf.random_uniform((K,))),
                                           b=tf.Variable(1 + tf.random_uniform((K,))),
                                           name='delta')
    tf.scalar_summary('mean_q_delta', tf.reduce_mean(q_delta))

    q_lam = ed.models.NormalWithSoftplusSigma(mu=tf.Variable(tf.random_normal((N,))),
                                              sigma=tf.Variable(tf.random_uniform((N,))),
                                              name='lam')
    tf.scalar_summary('mean_q_lam', tf.reduce_mean(q_lam))

    q_sig = ed.models.NormalWithSoftplusSigma(mu=tf.Variable(-0.1 * tf.random_uniform((1,))),
                                              sigma=tf.Variable(tf.random_uniform((1,))),
                                              name='sig')
    tf.scalar_summary('mean_q_sig', tf.reduce_mean(q_sig))


# Do variational inference

In [10]:
data = {cnt: count, X: Xdat}
inference = ed.KLqp({A: q_A, B: q_B, C: q_C, Z: q_Z, sig: q_sig, delta: q_delta, lam: q_lam}, data)

In [11]:
init = tf.initialize_all_variables()

# Notes before inference:

- The `logdir` keyword specifies the place to put the log file (assuming you've instrumented the code to save events, etc.). If a subdirectory is given, pointing Tensorboard at the parent directory allows you to compare across subdirectories (runs).
    - I'm using the `jmp/instrumented` branch of the `jmxpearson/edward` fork
- I had to lower the learning rate in Adam to avoid NaNs early on in learning. Gradient clipping might solve the same problem.
- I'm currently using "all" the data, but this should probably be switched to minibatches.
- I've used `n_samples` = 1, 5, 10, and 25, which all seem pretty similar after 10k iterations. 

In [12]:
inference.run(n_iter=10000, n_print=100, n_samples=25, logdir='data/run1',
             optimizer=tf.train.AdamOptimizer(1e-4))

Iteration     1 [  0%]: Loss = 415586.750
Iteration   100 [  1%]: Loss = 664088.250
Iteration   200 [  2%]: Loss = 609563.062
Iteration   300 [  3%]: Loss = 396827.719
Iteration   400 [  4%]: Loss = 304928.750
Iteration   500 [  5%]: Loss = 393331.438
Iteration   600 [  6%]: Loss = 293944.188
Iteration   700 [  7%]: Loss = 632169.188
Iteration   800 [  8%]: Loss = 334810.625
Iteration   900 [  9%]: Loss = 379632.156
Iteration  1000 [ 10%]: Loss = 491948.125
Iteration  1100 [ 11%]: Loss = 377430.438
Iteration  1200 [ 12%]: Loss = 626805.875
Iteration  1300 [ 13%]: Loss = 454571.719
Iteration  1400 [ 14%]: Loss = 345653.094
Iteration  1500 [ 15%]: Loss = 248194.922
Iteration  1600 [ 16%]: Loss = 301853.438
Iteration  1700 [ 17%]: Loss = 272089.375
Iteration  1800 [ 18%]: Loss = 375434.562
Iteration  1900 [ 19%]: Loss = 318505.281
Iteration  2000 [ 20%]: Loss = 433494.469
Iteration  2100 [ 21%]: Loss = 296638.406
Iteration  2200 [ 22%]: Loss = 335983.094
Iteration  2300 [ 23%]: Loss = 198