# DyNet Mini-batches RNN Example

Simple example of how to use mini-batches for RNNs in DyNet

In [1]:
import dynet as dn
from random import randint
from time import time
from numpy import mean, argmax, ceil

VOCAB_SIZE = 2
EMBEDDINGS_SIZE = 10
LSTM_NUM_OF_LAYERS = 1
STATE_SIZE = 10
NUM_OF_CLASSES = 2
REPEATS = 1000
BATCH_SIZE = 2

Create a random sequence to labael dataset

In [2]:
def gen_random_dataset(num_of_examples, max_seq):
    X = []
    Y = []
    for _ in range(num_of_examples):
        seq = []
        for _ in range(randint(1, max_seq-1)):
            seq.append(randint(0, VOCAB_SIZE-1))
        X.append(seq)
        Y.append(randint(0, NUM_OF_CLASSES-1))
    return X, Y

def print_dataset(X, Y):
    for seq, label in zip(X, Y):
        print(seq, label)

X, Y = gen_random_dataset(10, 10)
print_dataset(X, Y)

[0] 1
[0, 1, 1, 1, 1, 1, 0] 0
[0, 1, 0, 1, 0, 0, 1, 0, 1] 1
[0, 1, 1, 0, 0, 1, 1, 0, 0] 1
[0, 1, 0, 0, 0, 0] 0
[1] 0
[1, 1, 0, 0, 0, 0, 1] 0
[1, 1, 1, 0, 0, 1, 0, 0, 0] 0
[1, 1, 0, 0, 0, 1, 0, 0] 0
[1, 1, 1] 1


We will split our dataset to batches, we will sort the dataset by length to make length variance of each batch minimal

In [3]:
def to_batch(X, Y):
    global VOCAB_SIZE
    #sort dataset by length
    data = list(zip(*sorted(zip(X,Y), key=lambda x: len(x[0]))))
    batched_X = []
    batched_Y = []
    for i in range(int(ceil(len(X)/BATCH_SIZE))):
        batched_X.append(data[0][i*BATCH_SIZE:(i+1)*BATCH_SIZE])
        batched_Y.append(data[1][i*BATCH_SIZE:(i+1)*BATCH_SIZE])
    return batched_X, batched_Y

batched_X, batched_Y = to_batch(X, Y)
print_dataset(batched_X, batched_Y)

([0], [1]) (1, 0)
([1, 1, 1], [0, 1, 0, 0, 0, 0]) (1, 0)
([0, 1, 1, 1, 1, 1, 0], [1, 1, 0, 0, 0, 0, 1]) (0, 0)
([1, 1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 1, 0, 0, 1, 0, 1]) (0, 1)
([0, 1, 1, 0, 0, 1, 1, 0, 0], [1, 1, 1, 0, 0, 1, 0, 0, 0]) (1, 0)


In each batch all sequences must be the same length so we will pad the begining of each sequence with a new vocabilary token

In [4]:
def pad_batch(batch):
    max_len = len(batch[-1])
    padded_batch = []
    for x in batch:
        x = [VOCAB_SIZE-1]*(max_len-len(x)) + x
        padded_batch.append(x)
    return padded_batch

VOCAB_SIZE += 1
batched_X_padded = list(map(pad_batch, batched_X))
print_dataset(batched_X_padded, batched_Y)

[[0], [1]] (1, 0)
[[2, 2, 2, 1, 1, 1], [0, 1, 0, 0, 0, 0]] (1, 0)
[[0, 1, 1, 1, 1, 1, 0], [1, 1, 0, 0, 0, 0, 1]] (0, 0)
[[2, 1, 1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 1, 0, 0, 1, 0, 1]] (0, 1)
[[0, 1, 1, 0, 0, 1, 1, 0, 0], [1, 1, 1, 0, 0, 1, 0, 0, 0]] (1, 0)


Create DyNet model

In [5]:
model = dn.Model()
input_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE))
lstm = dn.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model)
output_w = model.add_parameters((NUM_OF_CLASSES, STATE_SIZE))
output_b = model.add_parameters((NUM_OF_CLASSES))

def get_probs(batch):
    dn.renew_cg()
    
    #the I iteration embed all the ith items in all batches
    embeded = [dn.lookup_batch(input_lookup, chars) for chars in zip(*batch)]
    state = lstm.initial_state()
    output_vec = state.transduce(embeded)[-1]
    w = dn.parameter(output_w)
    b = dn.parameter(output_b)
    return w*output_vec+b

In [6]:
def train(trainX, trainY):
    print('starting train')
    trainer = dn.AdamTrainer(model)
    for _ in range(REPEATS):
        for X, Y in zip(trainX, trainY):
            probs = get_probs(X)
            loss = dn.sum_batches(dn.pickneglogsoftmax_batch(probs, Y))
            loss_value = loss.value()
            loss.backward()
            trainer.update()
    print('done training!')

def validate(testX, testY):
    print('starting validation')
    acc = []
    for X, Y in zip(testX, testY):
        probs = get_probs(X).npvalue()
        for i in range(len(probs[0])):
            pred = argmax(probs[:, i])
            label = Y[i]
            if pred == label:
                acc.append(1)
            else:
                acc.append(0)
    print('accuracy: ', mean(acc))

In [7]:
validate(batched_X_padded, batched_Y)
train(batched_X_padded, batched_Y)
validate(batched_X_padded, batched_Y)

starting validation
accuracy:  0.6
starting train
done training!
starting validation
accuracy:  1.0
