In [None]:
from Thesis_PreProcessing import trials_note as tn
from Thesis_PreProcessing import encoder_train_ico as eti
from Thesis_PreProcessing import ico_codes as ic
from Thesis_PreProcessing import more_io as mi
import collections
import numpy as np
import psycopg2
import matplotlib.pyplot as plt
import tensorflow as tf
import pickle


import hashlib
import numbers

from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.layers import base as base_layer
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import variables as tf_variables
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest
from tensorflow.python.util.tf_export import tf_export




In [None]:
# From TensorFlow package
class LayerRNNCell(tf.nn.rnn_cell.RNNCell):
  """Subclass of RNNCells that act like proper `tf.Layer` objects.
  For backwards compatibility purposes, most `RNNCell` instances allow their
  `call` methods to instantiate variables via `tf.get_variable`.  The underlying
  variable scope thus keeps track of any variables, and returning cached
  versions.  This is atypical of `tf.layer` objects, which separate this
  part of layer building into a `build` method that is only called once.
  Here we provide a subclass for `RNNCell` objects that act exactly as
  `Layer` objects do.  They must provide a `build` method and their
  `call` methods do not access Variables `tf.get_variable`.
  """

  def __call__(self, inputs, state, scope=None, *args, **kwargs):
    """Run this RNN cell on inputs, starting from the given state.
    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: if `self.state_size` is an integer, this should be a `2-D Tensor`
        with shape `[batch_size, self.state_size]`.  Otherwise, if
        `self.state_size` is a tuple of integers, this should be a tuple
        with shapes `[batch_size, s] for s in self.state_size`.
      scope: optional cell scope.
      *args: Additional positional arguments.
      **kwargs: Additional keyword arguments.
    Returns:
      A pair containing:
      - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`.
      - New state: Either a single `2-D` tensor, or a tuple of tensors matching
        the arity and shapes of `state`.
    """
    # Bypass RNNCell's variable capturing semantics for LayerRNNCell.
    # Instead, it is up to subclasses to provide a proper build
    # method.  See the class docstring for more details.
    return base_layer.Layer.__call__(self, inputs, state, scope=scope,
                                     *args, **kwargs)


In [None]:
# Build RRA cell

_WEIGHTS_VARIABLE_NAME = "W_r"
_BIAS_VARIABLE_NAME = "b"
_ATTENTION_VARIABLE_NAME = "W_a"

#_RRAStateTuple = collections.namedtuple("RRAStateTuple", ("c", "h_1"))
_RRAStateTuple = collections.namedtuple("RRAStateTuple", ("c", "h_1", "h_2", "h_3", "h_4", "h_5"))

# Inspired by TensorFlow implementation
class RRAStateTuple(_RRAStateTuple):
 

  __slots__ = ()

  @property
  def dtype(self):
    (c, h_1, h_2, h_3, h_4, h_5) = self
    # (c, h_1) = self
    if c.dtype != h_1.dtype:
      raise TypeError("Inconsistent internal state: %s vs %s" %
                      (str(c.dtype), str(h_1.dtype)))
    return c.dtype


class RRA_Cell(LayerRNNCell):
    
    def __init__(self, num_units, batchsize, forget_bias=1.0,
                activation=None, reuse=None, name=None):
  
    
        super(RRA_Cell, self).__init__(_reuse=reuse, name=name)
    
        # Inputs must be 2-dimensional.
        self.input_spec = base_layer.InputSpec(ndim=2)
        self._num_units = num_units
        self._forget_bias = forget_bias
        self._activation = activation or math_ops.tanh
        self.batch_size = batchsize
    @property    
    def output_size(self):
        return self._num_units
    @property
    def state_size(self):
        #print()
        return RRAStateTuple(self._num_units, self._num_units, self._num_units, self._num_units, self._num_units, 
                            self._num_units)
    
    def build(self, inputs_shape):
        if inputs_shape[1].value is None:
              raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

        input_depth = inputs_shape[1].value
        h_depth = self._num_units
        self._kernel = self.add_variable(
            _WEIGHTS_VARIABLE_NAME,
            shape=[input_depth + h_depth, 4 * self._num_units])
        self._attention_kernel = self.add_variable(
            _ATTENTION_VARIABLE_NAME,
            shape=[1 , 4])
        self._bias = self.add_variable(
            _BIAS_VARIABLE_NAME,
            shape=[4* self._num_units],
            initializer=init_ops.zeros_initializer(dtype=self.dtype))

        self.built = True
        
    def call(self, inputs, state):
        
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)
        c, h_1, h_2, h_3, h_4, h_5 = state
        
        gate_inputs = math_ops.matmul(array_ops.concat([inputs, h_1], 1), self._kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
        
        i, j, f, o = array_ops.split(value=gate_inputs, num_or_size_splits=4, axis=one)
        forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype)
        
        add = math_ops.add
        multiply = math_ops.multiply
        divide = math_ops.div
        
        new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
                multiply(sigmoid(i), self._activation(j)))
        
        attention_weights = divide(self._attention_kernel, tf.reduce_sum(self._attention_kernel, 1))
        h_stack = array_ops.stack([h_2, h_3, h_4, h_5], 0)
        h_stack = tf.reshape(h_stack, [4, -1])
        a = tf.reshape(math_ops.matmul(attention_weights, h_stack), [self.batch_size, 300])
        
        new_h = multiply(o, self._activation(add(new_c, a)))
        new_state = RRAStateTuple(new_c, new_h, h_1, h_2, h_3, h_4)

        return new_h, new_state 
        


In [None]:
# Build training graph

def Encoder_RNN_graph(feature_size, class_size = 943, state_size = 300, batch_size = 30,
    max_steps = 100, learning_rate = 1e-3, test=False, LSTM=False):
    
    reset_g()
    
    inputs = tf.placeholder(tf.float32, [None, max_steps, feature_size], name="input_sequence_ph")
    targets = tf.placeholder(tf.float32, [None, class_size], name="target_sequence_ph")
    lstm_inputs = tf.contrib.layers.fully_connected(inputs, state_size)
    if not LSTM:
        lstm_cell = RRA_Cell(state_size, batch_size)
    else:
        lstm_cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    init_state = lstm_cell.zero_state(batch_size, tf.float32)
    lstm_outputs, fin_states = tf.nn.dynamic_rnn(cell=lstm_cell, initial_state=init_state, dtype=tf.float32, 
                                                               sequence_length=length(inputs), inputs=lstm_inputs)

    if not LSTM:
        c, final_hidden, h_2, h_3, h_4, h_5 = fin_states
    else:
        c, final_hidden = fin_states
   
    
    with tf.variable_scope('sigmoid'):
        W = tf.get_variable('W', [state_size, class_size])
        b = tf.get_variable('b', [class_size], initializer=tf.constant_initializer(0.0))
    

    scores = tf.matmul(final_hidden, W) + b
    predictions = tf.nn.sigmoid(scores)
    
    total_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=scores, labels=targets))
    if not test:
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
    else:
        train_step = []
    
    # print(scores.get_shape(), targets.get_shape)
    return dict(
        inputs = inputs,
        targets = targets,
#        relevant = relevant_outputs,
        total_loss = total_loss,
        train_step = train_step,
        initial = init_state,
        final = fin_states,
        predictions = predictions,
        saver = tf.train.Saver()
    )
    

In [None]:
def last_relevant(output, length):
    batch_size = tf.shape(output)[0]
    max_length = tf.shape(output)[1]
    out_size = int(output.get_shape()[2])
    index = tf.range(0, batch_size) * max_length + (length - 1)
    flat = tf.reshape(output, [-1, out_size])
    relevant = tf.gather(flat, index)
    return relevant

def length(inputs):
    used = tf.sign(tf.reduce_max(tf.abs(inputs), reduction_indices=2))
    length = tf.reduce_sum(used, reduction_indices=1)
    length = tf.cast(length, tf.int32)
    return length

def encoder_dict_to_paddedXY(note_dict, ico_dict, keys, test=False, max_length = 100, 
                             note_feats=3145 ,ico_feats=943):
    dim = len(keys)
    if test:
        print("TEST!")
        dim = 0
        for key in note_dict:
            if key in keys:
                note_dict[key] = visit_dict
                for visit in visit_dict:
                    dim += 1
        print dim 
    X = np.zeros((dim, max_length, note_feats), dtype=(np.float16))
    Y = np.zeros((dim, ico_feats), dtype=(np.float16))
    i = 0
    for subject in note_dict:
     
        if subject in keys:
      
            j = 0
            note_dict_2 = note_dict[subject]
            if subject in ico_dict:
                ico_dict_2 = ico_dict[subject]
               
            else:
                print("WRONG!")
            for visit in note_dict_2:
              
                mat = note_dict_2[visit]
             
                depth = mat.shape[0]
                X[i, 0: depth, :] = mat
                if visit in ico_dict_2:
                    Y[i, :] = ico_dict_2[visit]
                i += 1
                
        else:
            continue 
            
    return (X,Y)

def encoder_dict_test_XY(visit_notes, visit_ico, visit, max_length = 100, 
        note_feats=3145 ,ico_feats=943):
    dim = 1
    X = np.zeros((dim, max_length, note_feats), dtype=(np.float16))
    Y = np.zeros((dim, ico_feats), dtype=(np.float16))
    mat = visit_notes[visit]
    if mat.shape[0] > 100:
        idx = list(np.random.choice(mat.shape[0], 100))
        mat = mat[idx]
                    
    depth = mat.shape[0]
    X[0, 0:depth, :] = mat
    Y[0, :] = visit_ico[visit]
    
    return X,Y
        
    

def reset_g():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()



In [None]:

def train_encoder(g, inputs, targets, epoch_nums=2, batch_size=30, verbose=True, save=False):
    tf.set_random_seed(1234)
    train_size = inputs.shape[0]
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        training_losses = []
        
        for i in range(epoch_nums):
            epoch_training_loss = 0
            idx = np.random.choice(train_size, batch_size, replace=False)
            training_loss = 0
            training_state = None
            num_iters = int(train_size/batch_size)
            for j in range(num_iters):
                
                X = inputs[j*batch_size:(j+1)*batch_size]
                Y = targets[j*batch_size:(j+1)*batch_size]
                feed_dict={g['inputs']: X, g['targets']: Y}
                
                if training_state is not None:
                    training = training_state
                    feed_dict[g['initial']] = training
                
                    
                cur_training_loss, training_state, _ = sess.run([g['total_loss'],
                                                      g['final'],
                                                      g['train_step']],
                                                             feed_dict)
               
                training_loss += cur_training_loss
                
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/num_iters)
            training_losses.append(training_loss/num_iters)

        if isinstance(save, str):
            g['saver'].save(sess, save)


In [None]:
# Inference/Testing
def test_encoder(g, checkpoint, X_test, Y_test, N=20):
    
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        g['saver'].restore(sess, checkpoint)
        
        num_iters = X_test.shape[0]
        state = None
        total_test_loss = 0
        recall_accs = []
        for i in range(num_iters):
            X = X_test[i:i+1,:,:]
            targets = Y_test[i:i+1, :]
    
            if state is not None:
                state_fw = state
                feed_dict={g['inputs']: X, g['targets']: targets, g['initial']: state}
            else:
                feed_dict={g['inputs']: X, g['targets']: targets}
            cur_test_loss, state, predictions, _ = sess.run([g['total_loss'],
                                                      g['final'],
                                                      g['predictions'],
                                                      g['train_step']],
                                                             feed_dict)
            recall_acc = recall_at_n(predictions, targets, n=N)
            recall_accs.append(recall_acc)
            total_test_loss += cur_test_loss
            
        avg_test_loss = total_test_loss/num_iters
        
    return [avg_test_loss, recall_accs]
     

    
def recall_at_n(predictions, targets, n=20):
    dim = targets.shape[1]

    eps = .000000001
 
    valid_targets = targets[0, :]
    valid_preds = predictions[0, :]



    pos_count = 0.0
    pred_pos = 0.0
            
    for j in range(n):  
        if np.sort(-valid_targets)[j] <= (-1. + eps) and np.sort(-valid_targets)[j] >= (-1. - eps):
            pos_count += 1.
      
    for j in range(n):
        k = int(pos_count)
        if np.argsort(-valid_preds)[j] in np.argsort(-valid_targets)[0:k]:
                pred_pos += 1.
                    

    if pos_count != 0.0:
        recall_val = pred_pos/float(pos_count)
    else:
        recall_val = 0.0

    return recall_val


    

In [None]:
def dense_matrix(g, checkpoint, note_visits, ico_visits, LSTM):
    visits = note_visits.keys()
    final_state_vec = []
    for visit in visits:
        X, Y = encoder_dict_test_XY(note_visits, ico_visits, visit)
        with tf.Session() as sess:
        
            sess.run(tf.global_variables_initializer())
            g['saver'].restore(sess, checkpoint)
        
            init = None
            feed_dict = {g['inputs']: X, g['targets']: Y}
            final_states = sess.run(g['final'], feed_dict)
            if not LSTM:
                c, final_state, h_2, h_3, h_4, h_5 = final_states
            else:
                c, final_state = final_states
            final_state_vec.append(final_state)
        
            
    return final_state_vec
            
    

In [None]:
def dense_representation_creator(g, checkpoint, ico_dict, note_dict, keys, LSTM=False):
    dense_dict = {}
    for subject in ico_dict:
        if subject in keys:
            visit_dict = ico_dict[subject]
            note_visits = note_dict[subject]
            dense_reps = dense_matrix(g, checkpoint, note_visits, visit_dict, LSTM)
            for i, visit in enumerate(visit_dict):
                if subject not in dense_dict:
                    dense_dict[subject] = {visit: []}
                else:
                    dense_dict[subject][visit] = []
                dense_dict[subject][visit] = dense_reps[i]
    return dense_dict



In [None]:
current_connection = tn.connect()
current_cur = current_connection[1]

# ICD9_Data_first
train_ico_dict = eti.ico_dict_pop_encoder(current_cur)
train_ico_dict_keys = list(train_ico_dict.keys())
# ico_dict_keys = np.array(ico_dict_keys)
tr_idx = int(.18*len(train_ico_dict_keys))
val_idx = int(.23*len(train_ico_dict_keys))
train_keys = train_ico_dict_keys[:tr_idx]
test_keys = train_ico_dict_keys[tr_idx+1: val_idx]
#test_keys = ico_dict_keys[val_idx:]

#ICD9_Data_final
embed_ico_dict = ic.ico_dict_populator(current_cur)
embed_ico_dict_keys = list(embed_ico_dict.keys())
samp = int(.18*len(embed_ico_dict_keys))
sample_keys = embed_ico_dict_keys[:samp]


# Note Data
with open('Thesis_PreProcessing/cui_map_train.pickle', 'rb') as cui_map_train, open('Thesis_PreProcessing/cui_map_test.pickle', 'rb') as cui_map_test:
    train_map = pickle.load(cui_map_train)
    test_map = pickle.load(cui_map_test)
with open('Thesis_PreProcessing/cui_dict_train.pickle', 'rb') as cui_dict_train, open('Thesis_PreProcessing/cui_dict_test.pickle', 'rb') as cui_dict_test:
    train_dict = pickle.load(cui_dict_train)
    test_dict = pickle.load(cui_dict_test)
    
merged_map = mi.map_merge(train_map, test_map)
train_vectorized = mi.cui_vectorizer(train_dict, merged_map)
test_vectorized = mi.cui_vectorizer(test_dict, merged_map)

old_X_Train = {}
old_Y_Train = {}


X_train, Y_train = encoder_dict_to_paddedXY(train_vectorized, train_ico_dict, train_keys)
X_test, Y_test = encoder_dict_to_paddedXY(train_vectorized, train_ico_dict, test_keys)


g_1 = Encoder_RNN_graph(3145, LSTM=True)
train_encoder(g_1, X_train, Y_train, epoch_nums=4, save='/tmp/encoder_LSTM_4.ckpt')
g_2 = Encoder_RNN_graph(3145, batch_size=1, test=True, LSTM=True)
loss, recalls = test_encoder(g_2, '/tmp/encoder_RRA_2.ckpt', X_test, Y_tt)
dense_embedding = dense_representation_creator(g_2, '/tmp/encoder_LSTM_4.ckpt', embed_ico_dict, test_vectorized, embed_ico_dict_keys, LSTM=True)

with open('Thesis_PreProcessing/dense_reps_LSTM.pickle','wb') as dense_reps:
    pickle.dump(dense_embedding, dense_reps, protocol=pickle.HIGHEST_PROTOCOL)
