Based on TensorFlow-Examples by aymericdamien
https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py

In [None]:
import tensorflow as tf
import numpy as np
import os
import time 
import datetime
import re
import itertools
from collections import Counter
from tensorflow.contrib import learn
from sklearn import metrics
from tensorflow.python.ops import rnn, rnn_cell

In [None]:
# Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 100
num_epochs = 40
display_step = 100

# Network parameters
n_input = 94
n_steps = 50 # one string is 300 chars long, 6 chars will be observed at each step
n_hidden = 128
n_classes = 2
sequence_length = 300
min_char = 33
max_char = 126

In [None]:
# tf Graph input
x = tf.placeholder("float",[None,sequence_length,n_input]) # takes in [batch,300,1,94]
y = tf.placeholder("float",[None,n_classes])

# Define weights
weights = {'out': tf.Variable(tf.random_normal([n_hidden,n_classes]),name="weights")}
biases = {'out': tf.Variable(tf.random_normal([n_classes]),name="biases")}

allow_soft_placement=True
log_device_placement=True
allow_growth=True

In [None]:
# File directory
train_FILE = 'train.txt'
test_FILE = 'test.txt'

In [None]:
# Load data
print("Loading data...")
# import training and test data

xy_train = np.loadtxt(train_FILE,unpack=True,dtype='int')
xy_test = np.loadtxt(test_FILE,unpack=True,dtype='int')
print("Data loaded!")

xy_train = xy_train.T
xy_test = xy_test.T
print "Training data shape: "+str(xy_train.shape)
print "Test data shape: "+str(xy_test.shape)

# get training and test sets
x_train = xy_train[:,0:300]
y_train = xy_train[:,300]
x_test = xy_test[:,0:300]
y_test = xy_test[:,300]

In [None]:
# use portion of data
tr_idx = np.random.permutation(len(y_train))
tr_idx = tr_idx[0:50000]
x_train = x_train[tr_idx,:]
y_train = y_train[tr_idx]

t_idx = np.random.permutation(len(y_test))
t_idx=t_idx[0:10000]
x_test = x_test[t_idx,:]
y_test = y_test[t_idx]

In [None]:
# change x to one-hot
def embed_x(x,max_char,min_char):
    m,n = x.shape
    out = np.zeros((m*n,max_char-min_char+1)) # (batch*length,94)
    x_reshape = np.reshape(x,(m*n)) # (batch_length,1)
    out[xrange(m*n),x_reshape-min_char]=1
    out=np.reshape(out,(m,n,-1))
    return out

In [None]:
# change y to 2 classes
def embed_y(y):
    y = np.concatenate([1-y,y],axis=1)
    return y

In [None]:
# RNN model
def RNN(x, weights, biases,visualize=False):
    with tf.device('/gpu:0'):
        # Prepare data shape to match `rnn` function requirements
        # Current data input shape: (batch_size, n_steps, n_input)
        # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

        # Permuting batch_size and n_steps
        print 'Permuting batch size and number of steps...'
        x = tf.transpose(x, [1, 0, 2])
        # Reshaping to (n_steps*batch_size, n_input)
        print 'Reshaping to (n_steps * batch_size, n_input)...'
        x = tf.reshape(x, [-1, n_input])
        # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        print 'Splitting to get a list of n_step tensors of shape (batch_size, n_input)...'
        x = tf.split(0, sequence_length, x)
        # x now is a list of 300 matrices, each with batch * 94
        # Define a lstm cell with tensorflow
        lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0,state_is_tuple=True)
        print lstm_cell

        # Get lstm cell output
        outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
        #print "Outputs of lstm:"+str(outputs)
        # output is 300 tensors of <tf.Tensor 'RNN/BasicLSTMCell_[number]/mul_2:0' shape=(?, 128) dtype=float32>
        #print "States:"+str(states)
        # a length T list of outputs (one for each input)

        # states take form of c= c=<tf.Tensor 'RNN/BasicLSTMCell_299/add_2:0' shape=(?, 128) dtype=float32>, 
        # h=<tf.Tensor 'RNN/BasicLSTMCell_299/mul_2:0' shape=(?, 128) dtype=float32>
        # only the final state
        # Linear activation, using rnn inner loop last output
        out = tf.matmul(outputs[-1], weights['out']) + biases['out']
        if visualize:
            return out,outputs
        else:
            return out

In [None]:
pred, vis = RNN(x, weights, biases, visualize=True)
# pred: Tensor("add:0", shape=(?, 2), dtype=float32, device=/device:GPU:0)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
#optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.initialize_all_variables()

In [None]:
# create saver
saver = tf.train.Saver()
# set options
gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.333)
config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True,
                        gpu_options=gpu_options)

In [None]:
# RUN!
sess = tf.InteractiveSession(config=config)
sess.run(init)

In [None]:
num_batches = len(y_train)/batch_size
steps=1
for i in xrange(num_epochs):
    tr_idx = np.random.permutation(len(y_train))
    x_train = x_train[tr_idx,:]
    y_train = y_train[tr_idx]
    
    for j in xrange(num_batches):
        batch_x = x_train[j*batch_size:(j+1)*batch_size]
        batch_y = y_train[j*batch_size:(j+1)*batch_size]
        batch_x = embed_x(batch_x,max_char,min_char)
        batch_y = embed_y(np.reshape(batch_y,(batch_y.shape[0],1)))
        sess.run(optimizer, feed_dict = {x:batch_x,y:batch_y})
        if (steps) % display_step ==0:
            acc = sess.run(accuracy, feed_dict={x:batch_x,y:batch_y})
            loss = sess.run(cost,feed_dict={x:batch_x,y:batch_y})
            print "Iter " + str(steps) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc)
        steps+=1
    print "End of Epoch %d........\n" %(i+1)

In [None]:
print "Optimization Finished! Saving variables..."
save_path = saver.save(sess,"model.ckpt")
print("Model saved in file: %s" % save_path)

In [None]:
saver.restore(sess,"model.ckpt")
print "Model restored"

In [None]:
# Calculate accuracy for other images
test_len = 128
#test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
#test_label = mnist.test.labels[:test_len]
original_x = x_test # version without embedding
x_test_embed = embed_x(x_test, max_char,min_char)
y_test_embed = embed_y(np.reshape(y_test,(y_test.shape[0],1)))
print "Testing Accuracy:", \
    sess.run(accuracy, feed_dict={x: x_test_embed, y: y_test_embed})
test_pred=sess.run(pred, feed_dict={x: x_test_embed, y: y_test_embed})
#print original_x[0]
#        sess.run(accuracy, feed_dict={x: test_data, y: test_label})

In [None]:
test_pred_val=np.argmax(test_pred,1)
auc = metrics.roc_auc_score(test_pred_val,y_test)
print "Testing AUC: %1.4f" %auc

In [None]:
layer=sess.run(vis, feed_dict={x: x_test_embed, y: y_test_embed})
layer=np.asarray(layer)
print layer.shape
layer= np.transpose(layer,[1,2,0])
print layer.shape
print layer[0].shape
# layer has shape of [num_samples(10000), num_steps(300), num_filters(128)]

In [None]:
for i in xrange(10):        
    a = original_x[i]
    b = layer[i]
    np.savetxt('test_sample_'+str(i)+'.txt',a,delimiter=',',fmt="%d")
    np.savetxt('test_sample_filter_'+str(i)+'.txt',b,delimiter=',',fmt='%1.4f')