# Learn representation

Trains a simple fully-connected neural network on electronic 
health records using treatment assignment as target, and
extracts activations from the last layer. This notebook also does some hyperparameter tuning on the
network.

In [1]:
%matplotlib inline
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from utils.data import load_data
from utils.nn import (
    add_fully_connected, 
    precision_recall,
    add_criteria,
    extract_activations
)

In [2]:
# Load data
datasets = load_data()
dim = datasets.dimension

In [3]:
# Instantiate session and create base variables
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, dim])
y_ = tf.placeholder(tf.float32, shape=[None, 2])

In [27]:
# Hidden layers dimensions
hidden1 = 50
hidden2 = 5

# Actual network creation
h1 = add_fully_connected(x, dim, hidden1, relu=False)
h2 = add_fully_connected(h1, hidden1, hidden2, relu=False)
y_scores = add_fully_connected(h2, hidden2, 2, relu=False)

cross_entropy, train_step, correct_prediction, accuracy = add_criteria(y_, y_scores, 2.3e-6)

In [None]:
# Keep history to plot accuracies
train_acc = []
val_acc = []
train_losses = []
val_losses = []

# Actually train data
sess.run(tf.initialize_all_variables())

for i in range(100000):
    batch = datasets.train.next_batch(512)
    if i % 1000 == 0:
        train_accuracy = accuracy.eval(
            feed_dict={x: batch[0], y_: batch[1]}
        )
        val_accuracy = accuracy.eval(
            feed_dict={x: datasets.val1._patients, 
                       y_: datasets.val1._labels}
        )
        train_loss = cross_entropy.eval(
            feed_dict={x: batch[0], y_: batch[1]}            
        )
        val_loss = cross_entropy.eval(
            feed_dict={x: datasets.val1._patients, 
                       y_: datasets.val1._labels}            
        )
        train_acc.append(train_accuracy)
        val_acc.append(val_accuracy)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        print "Step %d:" % i, val_accuracy
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})

print("Validation accuracy %g" % accuracy.eval(
    feed_dict={x: datasets.val1._patients, y_: datasets.val1._labels}
))

# Plot learning curves
plt.figure(1)
plt.plot(train_acc)
plt.plot(val_acc)
plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.figure(2)
plt.plot(train_losses)
plt.plot(val_losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")

# print "Precision, Recall:", precision_recall(x, y_, y_scores, datasets.val1)

Step 0: 0.499538
Step 1000: 0.557863
Step 2000: 0.598943
Step 3000: 0.627602
Step 4000: 0.647029


In [22]:
# Extract activations
extract_activations(x, y_, y_scores, datasets.val2, "val2")
extract_activations(x, y_, y_scores, datasets.test, "test")