In [2]:
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)

import pickle_utils as pu
import numpy as np
from scipy.ndimage import convolve1d
from scipy.optimize import minimize
import tensorflow as tf

In [36]:
X, y = pu.load('clean_ventilation/interpolation/cat_108.pkl.gz')
# The last item in `total_counts` is the number of missing
total_counts = pu.load('clean_ventilation/interpolation/counts_cat_108.pkl.gz')
len_t = max(t for _, t in X)+1
N_cats = max(x for x, _ in X)+1
counts = np.zeros([N_cats, N_cats, len_t], dtype=np.float)

for (cat, t), c in zip(X, y):
    counts[cat,c,t] += 1
counts.shape, total_counts.shape
total_counts

array([4747726,   26269,    2079,    3120,     944,   19719,     314,
             3,       4,       4,       1,      12,       1,   13669,
        352874])

In [26]:
import pandas as pd

csv = pd.read_csv('../mimic.csv.gz', header=0, index_col='icustay_id',
                  usecols=['icustay_id', 'C Assistance Device'], engine='c')


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



In [33]:
l = list(csv['C Assistance Device'].value_counts().items())
l.sort()
l

[(0.0, 456386),
 (1.0, 116282),
 (2.0, 45385),
 (3.0, 33026),
 (4.0, 2518),
 (5.0, 39497),
 (6.0, 945),
 (7.0, 121),
 (8.0, 125),
 (9.0, 27),
 (10.0, 13),
 (11.0, 36),
 (12.0, 32),
 (13.0, 35837),
 (14.0, 1)]

We create the training and test sets by drawing some examples at random

In [11]:
def make_training_test(counts, proportion):
    counts_ = np.reshape(counts, [-1])
    total_examples = np.sum(counts_)
    counts_ = counts_ / total_examples

    test_set = np.random.multinomial(int(total_examples*proportion),
                                        pvals=counts_)
    test_set = np.reshape(test_set, counts.shape)
    training_set = counts.astype(np.int) - test_set
    test_set += np.clip(training_set, -1000000000, 0)
    training_set = counts.astype(np.int) - test_set
    assert np.all(training_set >= 0)
    return (test_set, training_set)
test_set, training_set = make_training_test(counts, .3)

Now we normalize `counts` to produce `p`, and plot it.

In [12]:
norm = np.sum(counts+1, axis=1, keepdims=True)
norm = np.tile(norm, [1,N_cats,1])
counts[norm==0] += 1
counts[norm==0] += counts.shape[0]
p = counts/norm

plots = []
for i in range(p.shape[1]):
    plots.append({'y': p[0, i, :]})
    
iplot(plots)

We "compact" the training and test set so that only non-zero entries are represented, and then create a matrix with the distances of every test point to every time step: `filter_matrix`

In [13]:
np_filter = np.zeros([len_t*2+1], dtype=np.float)
np_filter[:len_t] = np.arange(len_t, 0, -1)
np_filter[len_t+1:] = np.arange(len_t)+1

training_time_filter = (np.sum(np.sum(training_set, axis=0), axis=0) != 0)
test_time_filter = (np.sum(np.sum(test_set, axis=0), axis=0) != 0)

compact_training = training_set[:,:,training_time_filter]
assert np.sum(training_set[:,:,~training_time_filter]) == 0.0
filter_matrix = np.zeros([np.sum(test_time_filter), compact_training.shape[2]], dtype=np.float32)
for i, t in enumerate(test_time_filter.nonzero()[0]):
    filter_matrix[i,:] = np_filter[len_t-t:len_t*2-t][training_time_filter]
    
np_smoothing = np.reshape(total_counts, [1,1,N_cats])/np.sum(total_counts)

filter_matrix = -filter_matrix
compact_training = np.transpose(compact_training, [0,2,1])
compact_test = np.transpose(test_set[:,:,test_time_filter], [0,2,1])

def probas(scale):
    f_mat = -np.array(list(np_filter[len_t-t:len_t*2-t] for t in range(len_t)))
    n = np.matmul(np.exp(f_mat*scale), np.transpose(training_set, [0,2,1])) + np_smoothing
    p = n/np.sum(n, axis=2, keepdims=True)
    return p

def log_likelihood(scale):
    n = (np.matmul(np.exp(filter_matrix*scale), compact_training) +
         np_smoothing)
    p = n/np.sum(n, axis=2, keepdims=True)
    return np.sum(compact_test*np.log(p))

In [14]:
def plot_probas(scale):
    plots = []
    ps = probas(scale)
    for i in range(ps.shape[2]):
        plots.append({'y': ps[0, :, i]})
    iplot(plots)

This is the length scale without being optimised

In [17]:
plot_probas()

In [103]:
ans = minimize(log_likelihood, 1)
ans

      fun: -319.222058720084
 hess_inv: array([[ 153276.83309858]])
      jac: array([ 0.])
  message: 'Optimization terminated successfully.'
     nfev: 66
      nit: 21
     njev: 22
   status: 0
  success: True
        x: array([ 24.16643107])

In [104]:
plot_probas(ans.x)

In [105]:
with tf.variable_scope("cat_0l"):
    scale = tf.get_variable("scale", shape=[], dtype=tf.float32,
                            trainable=True,
                            initializer=tf.constant_initializer(1))
    m1 = tf.exp(filter_matrix.astype(np.float32)*scale)
    m1 = tf.tile(tf.expand_dims(m1, 0), [N_cats, 1, 1])
    m2 = compact_training.astype(np.float32)
    n = (tf.matmul(m1, m2) + np_smoothing.astype(np.float32))
    p = n/tf.reduce_sum(n, axis=2, keep_dims=True)
    tf_ll = tf.reduce_sum(compact_test*tf.log(p))


In [106]:
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(-tf_ll)
patience = 5
times_waiting = 0
prev_ll = -np.inf
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(100000):
        sess.run(train_op)
        if i%100 == 0:
            _, ll, s = sess.run([train_op, tf_ll, scale])
            print("step", i, ll, ", scale =", s)
            if ll > prev_ll:
                times_waiting = 0
            else:
                times_waiting += 1
                if times_waiting > patience:
                    break
            prev_ll = ll
        
    s = sess.run([scale])

step 0 -272.79 , scale = 0.998
step 100 -271.348 , scale = 0.896393
step 200 -269.849 , scale = 0.793593
step 300 -268.311 , scale = 0.690135
step 400 -266.792 , scale = 0.587614
step 500 -265.506 , scale = 0.492658
step 600 -264.76 , scale = 0.418723
step 700 -264.515 , scale = 0.374801
step 800 -264.469 , scale = 0.355068
step 900 -264.463 , scale = 0.347955
step 1000 -264.463 , scale = 0.345793
step 1100 -264.463 , scale = 0.345226
step 1200 -264.463 , scale = 0.345097
step 1300 -264.463 , scale = 0.345072
step 1400 -264.463 , scale = 0.345068
step 1500 -264.463 , scale = 0.345068
step 1600 -264.463 , scale = 0.345068
step 1700 -264.463 , scale = 0.345068
step 1800 -264.463 , scale = 0.345068
step 1900 -264.463 , scale = 0.345068
step 2000 -264.463 , scale = 0.345068


In [107]:
print(log_likelihood(0.936437))
print(log_likelihood(0.345071))

-271.907992455
-264.463278991
