# Determine ADJ/Verb based on Suffix
We build a model that determines, based on the suffix, whether a word is an adjective or verb.

Using just the suffix (e.g., last 3 characters), we have a fixed-size input which is convenient. 

In [None]:
# import statements for modules that we may need below.
import dynet as dy
import sys
from random import shuffle

# read in the words and set up the "input vocabulary" (in this case: all characters)
data = []
classes = []
with open('verben.words') as f:
    data.extend([(0, l.strip()) for l in f.readlines()])
    classes.append('verb')
with open('adjektive.words') as f:
    data.extend([(1, l.strip()) for l in f.readlines()])
    classes.append('adjective')

characters = set("".join(list([x[1] for x in data])))
characters.add("<NONE>") # special tag that we use to pad in too short words

int2char = list(characters)
char2int = {c:i for i,c in enumerate(characters)}

VOCAB_SIZE = len(characters)
CLASSES_SIZE = len(classes)

In [None]:
# how about we take a look at our data now:
data # this produces a lot of output: pairs of *class* and *example*
# let's call the pairs of class+example *instance*

so far, words have different lengths which has the potential of making things difficult. In this example, we want to take our decisions based on the last 3 characters of a word. We'll hence shorten the data to contain only the last three characters.

In [None]:
N = 3 # might as well make this flexible
shortened_data = []
for (cls, word) in data:
    suffix = list(word[-N:len(word)])
    # now, if we're unlucky, then the word was too shard and we have to pad at the beginning
    while len(suffix) < N:
        suffix.insert(0, '<NONE>')
    shortened_data.append((cls, suffix))
data = shortened_data
# now, you might want to take another look at data

In [None]:
def compute(params, instance):
    dy.renew_cg()
    (cls, suffix) = instance
    lookup = params["lookup"]
    inputs = [lookup[char2int[c]] for c in suffix]
    inputVector = dy.concatenate(inputs)
    R = dy.parameter(params["R"])
    bias = dy.parameter(params["bias"])
    output = R * inputVector + bias
    loss = dy.pickneglogsoftmax(output, cls)
    estimatedClass = max([(v,i) for (i,v) in enumerate(output.value())])[1]
    isCorrect = estimatedClass == cls
    return loss, isCorrect

# train, and report correctness after each training iteration
def train(params, data):
    shuffle(data)
    trainer = trainer_type(pc)
    for i in range(ITERATIONS):
        correct = 0
        for instance in data:
            loss, isCorrect = compute(params, instance)
            correct += 1 if isCorrect else 0
            loss_value = loss.value()
            loss.backward()
            trainer.update()
        print("IT: {}, correct: {}".format(i, correct/len(data)))

In [None]:
INPUT_DIM = 40
HIDDEN_DIM = INPUT_DIM * N

ITERATIONS = 10

pc = dy.ParameterCollection()
params = {}
params["lookup"] = pc.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM))
params["R"] = pc.add_parameters((CLASSES_SIZE, HIDDEN_DIM))
params["bias"] = pc.add_parameters((CLASSES_SIZE))

trainer_type = dy.SimpleSGDTrainer

In [None]:
train(params, data)

In [None]:
loss, cls = compute(params, (0, list("ern")))
print(loss.value(), cls)