In [1]:
import random
import pickle
import numpy as np

from pysem.corpora import SNLI
from pysem.networks import DependencyNetwork
from pysem.generatives import EmbeddingGenerator, TreeGenerator

snli = SNLI('/Users/peterblouw/corpora/snli_1.0/')
snli.extractor = snli.get_xy_pairs
snli.load_vocab('snli_words.pickle')

vectors = 'pretrained_snli_embeddings.pickle'

with open('depdict', 'rb') as pfile:
    depsets = pickle.load(pfile)

dev_data = [d for d in snli.dev_data if d.label == 'entailment']

dim = 400
iters = 40
rate = 0.05
batchsize = 10

encoder = DependencyNetwork(dim=dim, vocab=snli.vocab)
decoder = TreeGenerator(dim=dim, vocab=snli.vocab)

for _ in range(iters):
    print('On iteration ', _)
    for sample in random.sample(dev_data[:15], batchsize):
        s1 = sample.sentence1
        s2 = sample.sentence2

        encoder.forward_pass(s1)
        decoder.forward_pass(encoder.get_root_embedding(), s2)
        decoder.backward_pass(rate=rate)
        encoder.backward_pass(decoder.pass_grad, rate=rate)

On iteration  0
On iteration  1
On iteration  2
On iteration  3
On iteration  4
On iteration  5
On iteration  6
On iteration  7
On iteration  8
On iteration  9
On iteration  10
On iteration  11
On iteration  12
On iteration  13
On iteration  14
On iteration  15
On iteration  16
On iteration  17
On iteration  18
On iteration  19
On iteration  20
On iteration  21
On iteration  22
On iteration  23
On iteration  24
On iteration  25
On iteration  26
On iteration  27
On iteration  28
On iteration  29
On iteration  30
On iteration  31
On iteration  32
On iteration  33
On iteration  34
On iteration  35
On iteration  36
On iteration  37
On iteration  38
On iteration  39


In [11]:
for sample in random.sample(dev_data[:10], 2):
    s1 = sample.sentence1
    s2 = sample.sentence2

    encoder.forward_pass(s1)
    decoder.forward_pass(encoder.get_root_embedding(), s2)
    
    print('')
    print('Source Sentence: ', s1)
    print('Correct Entailment: ', s2)
    
    predicted = []
    encoder.forward_pass(s2)
    for node in encoder.tree:
        for other_node in decoder.sequence:
            if other_node.dep_ == node.dep_ and other_node.lower_ == node.lower_ and node.head.lower_ == other_node.head.lower_:
                predicted.append(other_node.pw)
        
    print('Predicted Entailment: ', ' '.join(predicted))


Source Sentence:  Two women are embracing while holding to go packages.
Correct Entailment:  Two woman are holding packages.
Predicted Entailment:  two woman are holding packages .

Source Sentence:  Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink.
Correct Entailment:  Two kids in numbered jerseys wash their hands.
Predicted Entailment:  two kids in numbered jerseys wash their hands .


In [16]:
for sample in random.sample(dev_data[:10], 2):
    s1 = sample.sentence1
    s2 = sample.sentence2

    encoder.forward_pass(s1)
    decoder.predict(encoder.get_root_embedding(), len(s2.split()))
    print('')
    print(s1)
    print(s2)
    for item in decoder.sequence:
        print(item.pw, item.pd, item.ph)


Two young boys of opposing teams play football, while wearing full protection uniforms and helmets.
boys play football
play ROOT play
boys nsubj play
football dobj play

Two men on bicycles competing in a race.
People are riding bikes.
riding ROOT riding
people nsubj riding
are aux riding
bikes dobj riding
