In [1]:
# theano imports
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from theano.tensor.nnet.conv import conv2d
# from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.pool import pool_2d as max_pool_2d

# other imports
from sklearn.cross_validation import train_test_split
import numpy as np
import pandas as pd
import pickle

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# read our data in 
with open('conv_data/5_tensor.p', 'rb') as f:
    loaded_data = pickle.load(f)

labels = pd.read_csv('one_hot_labels.csv')

In [4]:
one_hot = labels.values

In [5]:
xTrain, xTest, yTrain, yTest = train_test_split(loaded_data, one_hot)

In [6]:
xTrain = xTrain.reshape(-1, 1, 5, 21)
xTest = xTest.reshape(-1, 1, 5, 21)

In [7]:
print xTrain.shape, xTest.shape, yTrain.shape, yTest.shape

(101110, 1, 5, 21) (33704, 1, 5, 21) (101110, 6) (33704, 6)


In [9]:
srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def glorot_init_weights(shape):
    (h, w) = shape
    normalizer = 2.0 * sqrt(6) / sqrt(h + w) * 0.1  #factors: correct for uni[0,1], glo, glo, softmax deriv
    return theano.shared(floatX((np.random.random_sample(shape) - 0.5) * normalizer))

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def activate(X):
    return T.nnet.relu(X)

def rectify(X):
    return T.maximum(X, 0.)
    #return T.maximum(X, 0.01*X)  #leaky rectifier

def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 1, 'x', 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 1, 'x', 'x')

def dropout(X, p=0.0):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    
    return updates

def model(X, wi, wh, wo, p_drop_conv, p_drop_hidden):
    layer_1 = rectify(conv2d(X, wi, border_mode='valid')) # , border_mode='full'
    layer_1 = layer_1.reshape((-1, 256))
    layer_1 = dropout(layer_1, p_drop_conv)

    layer_2 = rectify(T.dot(layer_1, wh))
    layer_2 = dropout(layer_2, p_drop_hidden)

#     pyx = softmax(T.dot(layer_2, wo))
    pyx = T.nnet.softmax(T.dot(layer_2, wo))
    return layer_1, layer_2, pyx

In [10]:
X = T.ftensor4()
Y = T.fmatrix()

# define mini-batch size
mbs = 128

# define number of desired features out of convolution
n_conv = 256

# define hidden layer depth
h_depth = 600

wi = init_weights((n_conv, 1, 5, 21))
wh = init_weights((n_conv, h_depth))
wo = init_weights((h_depth, 6))

noise_l1, noise_l2, noise_py_x = model(X, wi, wh, wo, 0.2, 0.5)
l1, l2, py_x = model(X, wi, wh, wo, 0., 0.)
y_x = T.argmax(py_x, axis=1)


cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [wi, wh, wo]
updates = RMSprop(cost, params, lr=1e-7)

train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)

# number of training iterations to perform
n_train = 101

costs = []
# performing our training
for i in xrange(n_train):
    for start, end in zip(xrange(0, len(xTrain), mbs), 
                          xrange(mbs, len(xTrain), mbs)):
        cost = train(xTrain[start:end], yTrain[start:end])
#         a, b, c = model(floatX(xTrain[start:end]), wi, wh, wo, 0., 0.)
#         print a.eval().shape, b.eval().shape, c.eval().shape,
#         print i, np.mean(np.argmax(yTest, axis=1) == predict(xTest))
    costs.append(cost)
    print i, np.mean(np.argmax(yTest, axis=1) == predict(xTest))

0 0.157043674341
1 0.37295276525
2 0.375178020413
3 0.376898884405


KeyboardInterrupt: 

In [None]:
__author__ = 'mike.bowles'
#based on  code from https://gist.github.com/tmramalho/5e8fda10f99233b2370f
import theano
import theano.tensor as T
import numpy as np
import cPickle as pickle
import matplotlib.pyplot as plt


def inspect_inputs(i, node, fn):
    print(i, node, "input(s) value(s):", [input[0] for input in fn.inputs])

def inspect_outputs(i, node, fn):
    print(" output(s) value(s):", [output[0] for output in fn.outputs])

nin = 5
n_hidden = 40
nout = 1

# initial weight matrices
W_uh = np.asarray(np.random.randn(nin, n_hidden) * 0.001, dtype = theano.config.floatX)
W_hh = np.asarray(np.random.randn(n_hidden, n_hidden) * 0.001, dtype = theano.config.floatX)
W_hy = np.asarray(np.random.randn(n_hidden, nout) * 0.001, dtype = theano.config.floatX)
b_hh = np.zeros((n_hidden,), dtype=theano.config.floatX)
b_hy = np.zeros((nout,), dtype=theano.config.floatX)

# define new matrices 
Wr_uh = np.asarray(np.random.randn(nin, n_hidden) * 0.001, dtype = theano.config.floatX)
Wr_hh = np.asarray(np.random.randn(n_hidden, n_hidden) * 0.001, dtype = theano.config.floatX)
Wz_uh = np.asarray(np.random.randn(nin, n_hidden) * 0.001, dtype = theano.config.floatX)
Wz_hh = np.asarray(np.random.randn(n_hidden, n_hidden) * 0.001, dtype = theano.config.floatX)

# making them all shared
Wr_uh = theano.shared(Wr_uh, 'Wr_uh')
Wr_hh = theano.shared(Wr_hh, 'Wr_hh')
Wz_uh = theano.shared(Wz_uh, 'Wz_uh')
Wz_hh = theano.shared(Wz_hh, 'Wz_hh')

# add bias terms later

W_uh = theano.shared(W_uh, 'W_uh')
W_hh = theano.shared(W_hh, 'W_hh')
W_hy = theano.shared(W_hy, 'W_hy')
b_hh = theano.shared(b_hh, 'b_hh')
b_hy = theano.shared(b_hy, 'b_hy')

x = T.matrix('x')
#activ = T.nnet.sigmoid
def activ(x, f=None):
    #return T.maximum(x,-0.1*x)
    if f == 'tan':
        return T.tanh(x)
    else:
        return T.nnet.sigmoid(x)
    # 

lr = T.scalar('lr', dtype=theano.config.floatX)
u = T.matrix('u', dtype=theano.config.floatX)
t = T.matrix('t', dtype=theano.config.floatX)

h0_tm1 = theano.shared(np.zeros(n_hidden, dtype=theano.config.floatX))

def recurrent_fn(u_t, h_tm1, W_hh, W_uh, W_hy, Wr_uh, Wr_hh, Wz_uh, Wz_hh, b_hh, b_hy):
    # new rec funct
    r_t = activ(T.dot(u_t, Wr_uh) + T.dot(h_tm1, Wr_hh))
    z_t = activ(T.dot(u_t, Wz_uh) + T.dot(h_tm1, Wz_hh))
    
    h_tilda = activ(T.dot(u_t, W_uh) + r_t*T.dot(h_tm1, W_hh), f='tan')
    
    h_t = ((1 - z_t)*h_tm1) + (z_t*h_tilda) + b_hh
    
    #     h_t = activ(T.dot(h_tm1, W_hh) + T.dot(u_t, W_uh) + b_hh)
    y_t = T.dot(h_t, W_hy) + b_hy
    
    return h_t, y_t

[h, y],_ = theano.scan(recurrent_fn, 
                       sequences = u,
                       outputs_info = [h0_tm1, None],
                       non_sequences = [W_hh, W_uh, W_hy, Wr_uh, Wr_hh, Wz_uh, Wz_hh, b_hh, b_hy])

cost = T.mean(((t - y)**2))

gW_hh, gW_uh, gW_hy, gWr_uh, gWr_hh, gWz_uh, gWz_hh, gb_hh, gb_hy = T.grad(cost, [W_hh, W_uh, W_hy, Wr_uh, Wr_hh, Wz_uh, Wz_hh, b_hh, b_hy])
        #theano.printing.debugprint([h0_tm1], print_type=True)

update = [(W_hh, W_hh - lr*gW_hh),
          (W_uh, W_uh - lr*gW_uh),
          (W_hy, W_hy - lr*gW_hy),
          (Wr_uh, Wr_uh - lr*gWr_uh),
          (Wr_hh, Wr_hh - lr*gWr_hh),
          (Wz_uh, Wz_uh - lr*gWz_uh),
          (Wz_hh, Wz_hh - lr*gWz_hh),
          (b_hh, b_hh - lr*gb_hh),                      
          (b_hy, b_hy - lr*gb_hy)]
    
train_step = theano.function([u, t, lr], cost,
                              on_unused_input='warn',
                              updates=update,
                              allow_input_downcast=True)
        #, mode='DebugMode'
        #mode=theano.compile.MonitorMode(pre_func=inspect_inputs, post_func=inspect_outputs),




if __name__ == '__main__':

    (xlist, ylist) = pickle.load(open('stockTT.bin', 'rb'))

    x = np.array(xlist, dtype = theano.config.floatX)
    y = np.array(ylist, dtype = theano.config.floatX)

    lr = 0.01
    e = 1.0
    nPasses = 1
    vals = []
    sqErr = []
    # best = 2
    batchSize = 2
    
    for i in range(nPasses):
        # for start, end in zip(range(0, len(x), batchSize), range(batchSize, len(x), batchSize)):
        #     u = np.asarray(x[start:end], dtype = theano.config.floatX).reshape((batchSize,nin))
        #     t = np.array(y[start:end], dtype=theano.config.floatX).reshape(batchSize, 1)
        #
        #     c = train_step(u, t, lr)
        #     print "iteration {0}: {1}".format(start, np.sqrt(c))
        #     e = 0.1*np.sqrt(c) + 0.9*e
        #     vals.append(e)
        #     sqErr.append(c)
        for istart in range(1001): #len(x) - batchSize
            u = np.asarray(x[istart:istart + batchSize], dtype=theano.config.floatX).reshape((batchSize, nin))
            t = np.array(y[istart:istart + batchSize], dtype=theano.config.floatX).reshape(batchSize, 1)

            c = train_step(u, t, lr)
            if istart % 100 == 0:
                print "iteration {0}: {1}".format(istart, np.sqrt(c))
            e = 0.1 * np.sqrt(c) + 0.9 * e
            vals.append(e)
            sqErr.append(c)

    errStatStart=500
    aStd = 3.55611944113  #standard deviation of labels
    errStd = np.sqrt(np.mean(sqErr[errStatStart:]))
    print 'RSS error =   ', errStd
    print 'R-squared =   ', 1 - (errStd/aStd) * (errStd/aStd)
    plt.plot(vals)
    plt.show()