In [48]:
from datetime import datetime
import os
import pickle
import math
import time
import argparse

from torch import nn, optim
import torch
from tqdm import tqdm

from parser_model import *
from utils.parser_utils import minibatches, load_and_preprocess_data, AverageMeter

from parser_transitions import *

## Parser Transitions

In [32]:
#init
sentence = ["I", "attended", "lectures", "in", "the", "NLP", "class"]
pp = PartialParse(sentence)

#parse
pp.parse_step("S")
pp.parse_step("S")
pp.parse_step("LA")
pp.parse_step("S")

#print
[pp.stack, pp.buffer, pp.dependencies]

[['ROOT', 'attended', 'lectures'],
 ['in', 'the', 'NLP', 'class'],
 [('attended', 'I')]]

In [33]:
#init
sentence = ["I", "attended", "lectures", "in", "the", "NLP", "class"]
pp = PartialParse(sentence)

#parse
transitions = ["S", "S", "LA", "S", "RA", "S", "S", "S", "S", "LA", "LA", "LA", "RA", "RA"]
pp.parse(transitions)

#print
[pp.stack, pp.buffer, pp.dependencies]

[['ROOT'],
 [],
 [('attended', 'I'),
  ('attended', 'lectures'),
  ('class', 'NLP'),
  ('class', 'the'),
  ('class', 'in'),
  ('attended', 'class'),
  ('ROOT', 'attended')]]

Minibatch parse

In [42]:
model = DummyModel()
sentence = ["I", "attended", "lectures", "in", "the", "NLP", "class"]
sentences = [sentence, "I love you".split()]    
pp = PartialParse(sentence)

In [43]:
transitions = model.predict([pp])

In [44]:
minibatch_parse(sentences, model, 1)

[[('class', 'NLP'),
  ('class', 'the'),
  ('class', 'in'),
  ('class', 'lectures'),
  ('class', 'attended'),
  ('class', 'I'),
  ('class', 'ROOT')],
 [('you', 'love'), ('you', 'I'), ('you', 'ROOT')]]

In [39]:
pp.buffer

['I', 'attended', 'lectures', 'in', 'the', 'NLP', 'class']

## Parser model

In [50]:
debug = True
parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(debug)
model = ParserModel(embeddings)

Loading data...
took 1.21 seconds
Building parser...
took 0.02 seconds
Loading pretrained embeddings...
took 1.20 seconds
Vectorizing data...
took 0.03 seconds
Preprocessing training data...
took 0.63 seconds


In [118]:
embeddings.shape

(5157, 50)

In [92]:
dev_data[-1]

{'word': [5156, 85, 5154, 5077, 88, 339, 117, 5154, 85, 195, 112, 5154, 87],
 'pos': [84, 41, 49, 44, 40, 39, 58, 50, 41, 39, 65, 49, 46],
 'head': [-1, 3, 3, 7, 5, 3, 7, 0, 9, 7, 11, 7, 7],
 'label': [-1, 19, 10, 17, 31, 4, 36, 0, 19, 8, 23, 7, 20]}

In [111]:
"""
1	No	_	ADV	DT	_	7	discourse	_	_
2	,	_	PUNCT	,	_	7	punct	_	_
3	it	_	PRON	PRP	_	7	nsubj	_	_
4	was	_	VERB	VBD	_	7	cop	_	_
5	n't	_	PART	RB	_	7	neg	_	_
6	Black	_	PROPN	JJ	_	7	compound	_	_
7	Monday	_	PROPN	NNP	_	0	root	_	_
8	.	_	PUNCT	.	_	7	punct	_	_
"""
test_data[0]

{'word': [5156, 176, 86, 101, 103, 118, 841, 391, 87],
 'pos': [84, 41, 45, 54, 48, 47, 43, 42, 46],
 'head': [-1, 7, 7, 7, 7, 7, 7, 0, 7],
 'label': [-1, 6, 20, 17, 5, 32, 35, 0, 20]}

In [114]:
"""
1	The	_	DET	DT	_	2	det	_	_
2	finger-pointing	_	NOUN	JJ	_	5	nsubj	_	_
3	has	_	AUX	VBZ	_	5	aux	_	_
4	already	_	ADV	RB	_	5	advmod	_	_
5	begun	_	VERB	VBN	_	0	root	_	_
6	.	_	PUNCT	.	_	5	punct	_	_
"""
test_data[7]

{'word': [5156, 85, 5154, 115, 566, 5154, 87],
 'pos': [84, 41, 43, 55, 47, 53, 46],
 'head': [-1, 2, 5, 5, 5, 0, 5],
 'label': [-1, 19, 17, 36, 3, 0, 20]}

In [115]:
"""
1	``	_	PUNCT	``	_	6	punct	_	_
2	The	_	DET	DT	_	4	det	_	_
3	equity	_	NOUN	NN	_	4	compound	_	_
4	market	_	NOUN	NN	_	6	nsubj	_	_
5	was	_	VERB	VBD	_	6	cop	_	_
6	illiquid	_	ADJ	JJ	_	0	root	_	_
7	.	_	PUNCT	.	_	6	punct	_	_
"""
test_data[8]

{'word': [5156, 96, 85, 1166, 174, 103, 5154, 87],
 'pos': [84, 61, 41, 39, 39, 48, 43, 46],
 'head': [-1, 6, 4, 4, 6, 6, 0, 6],
 'label': [-1, 20, 19, 35, 17, 5, 0, 20]}

## Parser Transitions