## Recurrent Embedding Kernel implementation

In [1]:
#import libraries
import os
os.environ["THEANO_FLAGS"] = "device=cuda0,floatX=float32,exception_verbosity='high'"

from theano import *
import theano.tensor as T

from sklearn.preprocessing import normalize
from scipy.spatial import distance_matrix
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

Using cuDNN version 6021 on context None
Mapped name None to device cuda0: GeForce GTX 1080 Ti (0000:01:00.0)


### Loading data

In [2]:
tsx = pd.read_csv('SPY.csv')

#get differenced features
X = tsx.values[:,1:-2]
Xd = np.diff(X,axis=0)
#normalize features
Xsd = ((Xd - np.mean(Xd,axis=0)) / np.std(Xd.astype(np.float32),axis=0)).astype(np.float32)[:-1][-5365:]

#get up/down labels
yt = tsx['Adj Close'].values.astype(np.float32)
Y = (np.diff(yt)[1:] > 0) * 1
Y = Y[-5365:]

### Building REK computational flow

In [3]:
#hiddens must have three lists: 
#   recurrent layer sizes, 
#   embedding layer sizes, 
#   and kernel layer sizes
hiddens = [[36,36,36],[36,36],[36,36]]

#activation function
activation = T.nnet.relu
#input shape
n_in = Xsd.shape[1]


###REDK flow
seed = 132345
rng = np.random.RandomState(seed)
x = T.matrix('x')
y = T.imatrix('y')
#function to generate weights
def genW(size):
    return np.asarray(
        rng.uniform(
            low=-0.1,
            high=0.1,
            size = size
        ),
        dtype = theano.config.floatX)
#get architectures of recurrent, embedding, and kernel blocks
rl,el,kl = hiddens
nr = len(rl)
nl = len(el)
nh = len(kl)
M = n_in
N = rl[-1]
nr = len(rl)

######
#Recurrent block
W0 = theano.shared(value=genW((M,rl[0])),name="W0",borrow=True)
Wu = theano.shared(value=genW((rl[-1],rl[0])),name="Wu",borrow=True)
b0 = theano.shared(value=np.zeros(rl[0],dtype=theano.config.floatX),name="b0",borrow=True)
u0 = theano.shared(value=np.zeros((1,N),dtype=theano.config.floatX),borrow=True)
params = [W0,Wu,b0]
W = []
b = []
for i in range(0,nr-1):
    W.append(
        theano.shared(value=genW((rl[i],rl[i+1])),name="W"+str(i+1),borrow=True))
    b.append(
        theano.shared(value=np.zeros(rl[i+1],dtype=theano.config.floatX),name="b"+str(i+1),borrow=True))
    params += [W[i], b[i]]
##theano scan for recurrent
def recurrence(xt,u_t1):
    u_t = T.tanh(T.dot(xt,W0) + T.dot(u_t1,Wu) + b0)
    for i in range(nr-1):
        u_t = activation(T.dot(u_t,W[i]) + b[i])
    return u_t,OrderedUpdates({u0: u_t})
u_t, update_u0 = theano.scan(fn=recurrence,
                              sequences = [x],
                              outputs_info = [u0]
                             )
rnn_out = u_t[:,0,:]


######
#embedding block
LLayer_W = []
LLayer_b = []
for i in range(nl):
    if i==0:
        size_in = N
    else:
        size_in = el[i-1]
    LLayer_W.append(theano.shared(genW([size_in,el[i]]),name='W_e'+str(i),borrow=True))
    LLayer_b.append(theano.shared(np.zeros(el[i],dtype=theano.config.floatX),name='b_e'+str(i),borrow=True))
    params += [LLayer_W[i],LLayer_b[i]]
    
i1 = T.ivector('i1')
#data stream 1
u_x = rnn_out[i1]
for i in range(nl):
    u_x = activation(T.dot(u_x,LLayer_W[i]) + LLayer_b[i])
#data stream 2
i2 = T.ivector('i2')
u_y = rnn_out[i2]
for i in range(nl):
    u_y = activation(T.dot(u_y,LLayer_W[i]) + LLayer_b[i])
#combine
u1 = T.abs_(u_x - u_y)
u2 = u_x * u_y
MW1 = theano.shared(genW([el[-1],kl[0]]),name='WM1',borrow=True)
MW2 = theano.shared(genW([el[-1],kl[0]]),name='WM2',borrow=True)
Mb = theano.shared(np.zeros(kl[0],dtype=theano.config.floatX),name='bM',borrow=True)
u = activation(T.dot(u1,MW1)+T.dot(u2,MW2)+Mb)
params += [MW1,MW2,Mb]

#########
# kernel block
HLayer_W = []
HLayer_b = []
for i in range(nh):
    if i==0:
        size_in = kl[0]
    else:
        size_in = kl[i-1]
    HLayer_W.append(theano.shared(genW([size_in,kl[i]]),name='W_k'+str(i+nl),borrow=True))
    HLayer_b.append(theano.shared(np.zeros(kl[i],dtype=theano.config.floatX),name='b_k'+str(i+nl),borrow=True))
    params += [HLayer_W[i],HLayer_b[i]]

#########
#output layer
outLayer_W = theano.shared(genW([kl[-1],1]),name='W_out',borrow=True)
outLayer_b = theano.shared(np.ones(1,dtype=theano.config.floatX),name='b_out',borrow=True)
params += [outLayer_W,outLayer_b]
K = u
for i in range(nh):
    K = activation(T.dot(K,HLayer_W[i]) + HLayer_b[i])

#########
#output kernel
K = T.nnet.sigmoid(T.dot(K,outLayer_W)+outLayer_b)

#########
#label prediction
kY = T.eq(y[i1],y[i2])*1.

#########
#binary cross entropy loss
CE = T.nnet.binary_crossentropy(K,kY).mean()

########
#function to generate gram matrix for data
#oD: original dimension
def gen_gram(oD, data):
    gram = np.zeros((oD,oD), dtype=np.float32)
    gram[np.triu_indices(oD)] = data
    gram = gram + gram.T
    gram[np.diag_indices(oD)] = 1.
    return gram

## Training
### parameters

In [None]:
k = 5                #nearest neighbors
learning_rate=0.1   
n_epochs=1000
n_shuffle = 10
tlength = 5000       #training length (first 5000 time points)

### training

In [4]:
ep_step = n_epochs // n_shuffle
sX = theano.shared(np.asarray(Xsd[:tlength], dtype=theano.config.floatX))
sY = theano.shared(np.asarray(Y[:tlength].reshape(-1,1), dtype=np.int32))

####
#theano function to train models
cost = CE
gparams = [T.grad(cost,param) for param in params]
updates = [(param, param - learning_rate * gparam) for param,gparam in zip(params,gparams)]
train_rnn = theano.function(
    inputs=[i1,i2],
    outputs=[cost],
    updates=updates,
    givens={
        x: sX,
        y: sY
    },
)
####
#theano function to predict similarity
predictK = theano.function(
    inputs=[i1,i2],
    outputs=K,
    givens={x: sX}
)

n_epochs = 10000
testlength = 5365
trainY = Y[:tlength]
testY = Y[tlength:testlength]

print "...training"
for epoch in range(n_epochs):
    cc = []
    if epoch % ep_step == 0:
        ind1,ind2 = np.triu_indices(n=tlength)
        curK = gen_gram(tlength,predictK(ind1.astype(np.int32),ind2.astype(np.int32)).flatten())
        sorted_ind = np.argsort(curK,axis=1)
        tkY = np.tile(trainY,reps=tlength)
        skY = tkY[sorted_ind.flatten()].reshape(tlength,tlength)
        flt_indx = np.argwhere(np.mean(skY[:,:k],axis=1) < 1)[:,0]
        ind1 = np.repeat(np.arange(tlength)[flt_indx],repeats=k).astype(np.int32)
        ind2 = sorted_ind[flt_indx,:k].flatten().astype(np.int32)
    cc.append(train_rnn(ind1,ind2))
    
    print('Training epoch %d, cost %f' % (epoch, np.mean(cc)))

## applying on test data

In [23]:
#training and testing gram matrices
train_gram = gram[:tlength,:tlength]
test_gram = gram[tlength:,:tlength]

#KNN
test_kY = np.tile(trainY,reps=testY.shape[0])
sorted_ind = np.argsort(-test_gram,axis=1,)
sorted_y = test_kY[sorted_ind.flatten()].reshape(testY.shape[0],trainY.shape[0])
y_p = mode(sorted_y[:,:k],axis=1)[0]
#test accuracy
print np.mean(y_p.flatten()==testY)

0.710816438356
