#Recurrent Network for Character Prediction

In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import caffe, string, os
from caffe import Net, layers as Lr, params as Pr, NetSpec
from caffe.proto.caffe_pb2 import NetParameter, BlobShape, SolverParameter, EltwiseParameter
import cPickle as pickle
from glob import glob

In [3]:
save_pkl = lambda file, obj: pickle.dump(obj, open(file, 'wb'), protocol=-1)
load_pkl = lambda file: pickle.load(open(file, 'rb'))
sf = lambda *x: string.join([str(i) for i in x], '_')

##Load Data

In [4]:
hypes = {
    'sequence_length': 100,
    'layers_num': 1,
    'state_dim': 256,
    'batch_size': 200
}

T = hypes['sequence_length']
L = hypes['layers_num']
d = hypes['state_dim']
b = hypes['batch_size']

In [5]:
txt = open('rawtxt/linux_kernel.txt', 'r').read()
C = sorted(set(txt))
n, k = len(txt), len(C)
if k <= 2**8: uintn = uint8
else: uintn = uint16
X = array([C.index(c) for c in txt], dtype=uintn)
Y = X[1:].copy()
X = X[:-1]

def chop(x, n=None, m=None):
    if n: m = len(x)//n
    if m: n = len(x)//m
    X = split(array(x[:m*n]), n)
    return array(X)

rshape = lambda A: chop(rollaxis(chop(A,n=b),1,0),m=T)
X, Y = map(rshape, [X, Y])

save('X.npy', X)
save('Y.npy', Y)
print X.shape

(310, 100, 200)


##Deploy Saved Model

In [6]:
seed_chars = '#i'
temperature = 0.7
num_chars = 500

params = load_pkl('save/params.pkl')
net = caffe.Net('save/deploy.prototxt', 1)
PC = load_pkl('save/param_corresp.pkl')

for i, j in PC:
    for ip, p in enumerate(net.params[j]):
        p.data[...] = params[i][ip]

for c in seed_chars:
    x = C.index(c)
    net.blobs[sf('x',0)].data[...] = 0
    net.blobs[sf('x',0)].data[0, x] = 1
    for l in range(L):
        state_i = net.blobs[sf('h',0,l)].data
        state_f = net.blobs[sf('h',1,l)].data
        state_i[...] = state_f
    net.forward()

gen_chars = []
for t in range(num_chars):
    z = net.blobs[sf('z',0)].data[0].copy().astype(float)
    p = (lambda x: x/sum(x))(exp(z/temperature))
    x = random.choice(range(k), p=p)
    gen_chars.append(C[x])
    net.blobs[sf('x',0)].data[...] = 0
    net.blobs[sf('x',0)].data[0, x] = 1
    for l in range(L):
        state_i = net.blobs[sf('h',0,l)].data
        state_f = net.blobs[sf('h',1,l)].data
        state_i[...] = state_f
    net.forward()
            
print seed_chars + string.join(gen_chars, '')

#ifdef CONFIG_PGR(struct module *module_noter_prev_id(&cmd_info("set_file("op"),
			     int enabled = 1;

	if (preempt_to__state(&devid - interval) {
		if (count[2] = {
	{ CTL_INT,	NET_NES_LIOK:
		kdb_prio(tick);
	if (bm_sen are done the last to the before tched start to can the user no pointer to the interrupts */
	return 0;
}

/**
 * Returns;
	struct perf_event *end of the GNUTA the compated signal to usermode to be the task want to code chip interrupts to the object the unsigned long ip = (uns


##Create Model

In [7]:
def rnn_step(z, h, t, l):
    
    kwargs_fc = {
        'num_output': d,
        'param': [{'lr_mult': 1, 'decay_mult': 1, 'name': sf('W', l)},
                  {'lr_mult': 2, 'decay_mult': 0, 'name': sf('b', l)}],
        'weight_filler': {'type': 'uniform', 'min': -0.01, 'max': 0.01},
        'name': sf('fc', t, l)
    }
    
    h = Lr.Concat(z, h)
    h = Lr.InnerProduct(h, **kwargs_fc)
    h = Lr.TanH(h)
    
    return h

In [8]:
PROD = EltwiseParameter.PROD

def lstm_step(z, h, t, l):
    
    kwargs_fc = {
        'num_output': 2*d,
        'param': [{'lr_mult': 1, 'decay_mult': 1, 'name': sf('W', l)},
                  {'lr_mult': 2, 'decay_mult': 0, 'name': sf('b', l)}],
        'weight_filler': {'type': 'uniform', 'min': -0.01, 'max': 0.01},
        'name': sf('fc', t, l)
    }
    
    c, h = Lr.Slice(h, slice_point=d/2, ntop=2)
    h = Lr.Concat(z, h)
    h = Lr.InnerProduct(h, **kwargs_fc)
    i, f, o, g = Lr.Slice(h, slice_point=[d/2,d,3*d/2], ntop=4)
    i = Lr.Sigmoid(i)
    f = Lr.Sigmoid(f)
    o = Lr.Sigmoid(o)
    g = Lr.TanH(g)
    c = Lr.Eltwise(Lr.Eltwise(f, c, operation=PROD), Lr.Eltwise(i, g, operation=PROD))
    h = Lr.Eltwise(o, Lr.TanH(c), operation=PROD)
    h = Lr.Concat(c, h)
    
    return h

In [9]:
def rnn_param(T, b, drop=True):
    
    net_spec = NetSpec()

    bsH = BlobShape()
    bsH.dim.extend([b,d])
    h = []
    for l in range(L):
        h.append(Lr.DummyData(shape=bsH))
        setattr(net_spec, sf('h',0,l), h[l])

    bsX = BlobShape()
    bsX.dim.extend([b,k])
    bsY = BlobShape()
    bsY.dim.extend([b])

    for t in range(T):

        x = Lr.DummyData(shape=bsX)
        y = Lr.DummyData(shape=bsY)

        h[0] = rnn_step(x, h[0], t, 0)
        for l in range(1, L):
            h[l] = lstm_step(h[l-1], h[l], t, l)
            if drop: h[l] = Lr.Dropout(h[l])

        kwargs_fc = {
            'num_output': k,
            'param': [{'lr_mult': 1, 'decay_mult': 1, 'name': sf('W', L)},
                      {'lr_mult': 2, 'decay_mult': 0, 'name': sf('b', L)}],
            'weight_filler': {'type': 'uniform', 'min': -0.01, 'max': 0.01},
            'name': sf('fc', t, L)
        }

        z = Lr.InnerProduct(h[-1], **kwargs_fc)
        loss = Lr.SoftmaxWithLoss(z, y)

        setattr(net_spec, sf('x', t), x)
        setattr(net_spec, sf('y', t), y)
        setattr(net_spec, sf('z', t), z)
        for l in range(L): setattr(net_spec, sf('h', t+1, l), h[l])
        setattr(net_spec, sf('loss', t), loss)

    return net_spec.to_proto()

In [10]:
solver_hypes = {
    'base_lr': 5e-3,
    'weight_decay': 1e-4,
    'solver_type': SolverParameter.RMSPROP,
    'rms_decay': 0.8,
    'lr_policy': 'exp',
    'gamma': 0.9999,
    'solver_mode': SolverParameter.GPU,
    'device_id': 2
}

solver_param = SolverParameter()
solver_param.net_param.CopyFrom(rnn_param(T, b))
solver_param.test_net_param.add()
solver_param.test_net_param[0].CopyFrom(rnn_param(T, b, drop=False))
solver_param.test_iter.extend([1])
solver_param.test_interval = 10**9
for pr, val in solver_hypes.iteritems():
    setattr(solver_param, pr, val)
    
with open('solver.prototxt', 'w') as file: file.write(str(solver_param))

PC = [(sf('fc',l), sf('fc',0,l)) for l in range(L+1)]
save_pkl('PC.pkl', PC)

###Better to run this from the command line

In [None]:
!python train_recurrent.py

##Deploy (run this while training)

In [None]:
seed_chars = '#inc'
temperature = 1
num_chars = 500

params = load_pkl(sorted(glob('params/iter*.pkl'))[-1])
open('deploy.prototxt', 'w').write(str(rnn_param(1, 1, drop=False)))
net = caffe.Net('deploy.prototxt', 1)

for i, j in PC:
    for ip, p in enumerate(net.params[j]):
        p.data[...] = params[i][ip]

for c in seed_chars:
    x = C.index(c)
    net.blobs[sf('x',0)].data[...] = 0
    net.blobs[sf('x',0)].data[0, x] = 1
    for l in range(L):
        state_i = net.blobs[sf('h',0,l)].data
        state_f = net.blobs[sf('h',1,l)].data
        state_i[...] = state_f
    net.forward()

gen_chars = []
for t in range(num_chars):
    z = net.blobs[sf('z',0)].data[0].copy().astype(float)
    p = (lambda x: x/sum(x))(exp(z/temperature))
    x = random.choice(range(k), p=p)
    gen_chars.append(C[x])
    net.blobs[sf('x',0)].data[...] = 0
    net.blobs[sf('x',0)].data[0, x] = 1
    for l in range(L):
        state_i = net.blobs[sf('h',0,l)].data
        state_f = net.blobs[sf('h',1,l)].data
        state_i[...] = state_f
    net.forward()
            
print seed_chars + string.join(gen_chars, '')