# Spacy

In [10]:
import spacy
from nltk import Tree

def to_nltk_tree(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_

nlp_spacy = spacy.load('en_core_web_sm')

In [23]:
sample = 'The photo quality is amazing.'
doc = nlp_spacy(sample)
[to_nltk_tree(sent.root).pretty_print() for sent in doc.sents]

         is                  
    _____|_________           
   |     |      quality      
   |     |    _____|______    
amazing  .  The         photo



[None]

In [27]:
for token in doc:
    print(token.i, token.text, token.pos_, token.tag_, token.dep_, token.head.i)

0 The DET DT det 2
1 photo NOUN NN compound 2
2 quality NOUN NN nsubj 3
3 is VERB VBZ ROOT 3
4 amazing ADJ JJ acomp 3
5 . PUNCT . punct 3


# Stanfordnlp

In [2]:
import stanfordnlp
nlp = stanfordnlp.Pipeline()

Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_tokenizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_tagger.pt', 'pretrain_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt.pretrain.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_lemmatizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: depparse
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_parser.pt', 'pretrain_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt.pr

In [36]:
samples = ['although the software of the player is easy-to-use, it is simple.', 'This is all-around.', 'This is brand-new.', 'This is eye-catching.']
for sample in samples:
    doc = nlp(sample)
    print(doc.conll_file.conll_as_string())

1	although	although	SCONJ	IN	_	8	mark	_	_
2	the	the	DET	DT	Definite=Def|PronType=Art	3	det	_	_
3	software	software	NOUN	NN	Number=Sing	8	nsubj	_	_
4	of	of	ADP	IN	_	6	case	_	_
5	the	the	DET	DT	Definite=Def|PronType=Art	6	det	_	_
6	player	player	NOUN	NN	Number=Sing	3	nmod	_	_
7	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	8	cop	_	_
8	easy	easy	ADJ	JJ	Degree=Pos	16	advcl	_	_
9	-	-	PUNCT	HYPH	_	12	punct	_	_
10	to	to	ADP	IN	_	12	mark	_	_
11	-	-	PUNCT	HYPH	_	12	punct	_	_
12	use	use	VERB	VB	VerbForm=Inf	8	conj	_	_
13	,	,	PUNCT	,	_	16	punct	_	_
14	it	it	PRON	PRP	Case=Nom|Gender=Neut|Number=Sing|Person=3|PronType=Prs	16	nsubj	_	_
15	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	16	cop	_	_
16	simple	simple	ADJ	JJ	Degree=Pos	0	root	_	_
17	.	.	PUNCT	.	_	16	punct	_	_


1	This	this	PRON	DT	Number=Sing|PronType=Dem	5	nsubj	_	_
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	5	cop	_	_
3	all	all	ADV	RB	_	5	advmod	_	_
4	-	-	PUNCT	HYPH	_	



In [3]:
samples = ['The photo quality is amazing.', 'I am not pleased with the picture quality.']
for sample in samples:
    doc = nlp(sample)
    print(doc.conll_file.conll_as_string())

1	The	the	DET	DT	Definite=Def|PronType=Art	3	det	_	_
2	photo	photo	NOUN	NN	Number=Sing	3	compound	_	_
3	quality	quality	NOUN	NN	Number=Sing	5	nsubj	_	_
4	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	5	cop	_	_
5	amazing	amazing	ADJ	JJ	Degree=Pos	0	root	_	_
6	.	.	PUNCT	.	_	5	punct	_	_


1	I	I	PRON	PRP	Case=Nom|Number=Sing|Person=1|PronType=Prs	4	nsubj	_	_
2	am	be	AUX	VBP	Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin	4	cop	_	_
3	not	not	PART	RB	_	4	advmod	_	_
4	pleased	pleased	ADJ	JJ	Degree=Pos	0	root	_	_
5	with	with	ADP	IN	_	8	case	_	_
6	the	the	DET	DT	Definite=Def|PronType=Art	8	det	_	_
7	picture	picture	NOUN	NN	Number=Sing	8	compound	_	_
8	quality	quality	NOUN	NN	Number=Sing	4	obl	_	_
9	.	.	PUNCT	.	_	4	punct	_	_






In [10]:
for j in range(len(doc.sentences)):
    parsed_doc = doc.sentences[j].dependencies
    for i in range(len(parsed_doc)):
        print(parsed_doc[i][2].text, end=' ')
        print(parsed_doc[i][2].lemma, end=' ')
        print(parsed_doc[i][2].xpos, end=' ')
        print(parsed_doc[i][2].dependency_relation, end=' ')
        print(parsed_doc[i][2].governor)

there there EX expl 2
are be VBP root 0
a a DT det 5
couple couple NN compound 5
things thing NNS nsubj 2
i i PRP nsubj 9
did do VBD aux 9
nt not RB advmod 9
like like VB acl:relcl 5
though though RB advmod 9
but but CC cc 12
nothing nothing NN conj 2
serious serious JJ amod 12
: : : punct 2
a a DT det 2
little little JJ obl:npmod 3
larger larger JJR advmod 13
than than IN case 6
other other JJ amod 6
mp3s mp3 NNS obl 3
but but CC cc 9
still still RB advmod 9
light light JJ conj 6
, , , punct 13
the the DT det 12
software software NN nsubj 13
takes take VBZ root 0
some some DT det 15
time time NN obj 13
to to TO mark 18
get get VB aux:pass 18
used use VBN acl 15
to to IN obl 18
( ( -LRB- punct 25
maybe maybe RB advmod 25
10 10 CD nummod 25
- - SYM case 24
15 15 CD nmod 22
mins min NNS obl 18
) ) -RRB- punct 25
, , , punct 34
and and CC cc 34
this this DT det 30
thing thing NN nsubj:pass 34
would would MD aux 34
definitely definitely RB advmod 34
be be VB aux:pass 34
destroyed destroy V

In [45]:
sample = 'there are a couple things i didnt like though but nothing serious : a little larger than other mp3s but still light , the software takes some time to get used to ( maybe 10-15 mins ) , and this thing would definitely be destroyed with one easy-to-use .'
doc = nlp(sample)
print(doc.conll_file.conll_as_string())
parsed_doc = doc.sentences[0].dependencies
for i in range(len(parsed_doc)):
    print(parsed_doc[i][2].text, end=' ')
    print(parsed_doc[i][2].lemma, end=' ')
    print(parsed_doc[i][2].xpos, end=' ')
    print(parsed_doc[i][2].dependency_relation, end=' ')
    print(parsed_doc[i][2].governor)

1	there	there	PRON	EX	_	2	expl	_	_
2	are	be	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
3	a	a	DET	DT	Definite=Ind|PronType=Art	5	det	_	_
4	couple	couple	NOUN	NN	Number=Sing	5	compound	_	_
5	things	thing	NOUN	NNS	Number=Plur	2	nsubj	_	_
6	i	i	PRON	PRP	Case=Nom|Number=Sing|Person=1|PronType=Prs	9	nsubj	_	_
7	did	do	AUX	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	9	aux	_	_
8	nt	not	PART	RB	_	9	advmod	_	_
9	like	like	VERB	VB	VerbForm=Inf	5	acl:relcl	_	_
10	though	though	ADV	RB	_	9	advmod	_	_
11	but	but	CCONJ	CC	_	12	cc	_	_
12	nothing	nothing	PRON	NN	Number=Sing	2	conj	_	_
13	serious	serious	ADJ	JJ	Degree=Pos	12	amod	_	_
14	:	:	PUNCT	:	_	2	punct	_	_

1	a	a	DET	DT	Definite=Ind|PronType=Art	2	det	_	_
2	little	little	ADJ	JJ	Degree=Pos	3	obl:npmod	_	_
3	larger	larger	ADJ	JJR	Degree=Cmp	13	advmod	_	_
4	than	than	ADP	IN	_	6	case	_	_
5	other	other	ADJ	JJ	Degree=Pos	6	amod	_	_
6	mp3s	mp3	NOUN	NNS	Number=Plur	3	obl	_	_
7	but	but	CCONJ	CC	_	9	cc	_	_
8	still	still	ADV	RB	_	9	advmod	_	_
9	light	light



In [2]:
import stanfordnlp
nlp = stanfordnlp.Pipeline()

samples = ['The phone has a good screen.', 'I am not pleased with the picture quality.', 'The photo quality is amazing.', 'The software of the player is not easy-to-use.']
for sample in samples:
    doc = nlp(sample)
    print(doc.conll_file.conll_as_string())

Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_tokenizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_tagger.pt', 'pretrain_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt.pretrain.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_lemmatizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: depparse
With settings: 
{'model_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt_parser.pt', 'pretrain_path': '/home/dmlab/stanfordnlp_resources/en_ewt_models/en_ewt.pr



In [6]:
samples = ['The software of the player is not easy-to-use.', 'This is all-around.', 'This is brand-new.', 'This is eye-catching.']
for sample in samples:
    doc = nlp(sample)
    print(doc.conll_file.conll_as_string())

1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	software	software	NOUN	NN	Number=Sing	8	nsubj	_	_
3	of	of	ADP	IN	_	5	case	_	_
4	the	the	DET	DT	Definite=Def|PronType=Art	5	det	_	_
5	player	player	NOUN	NN	Number=Sing	2	nmod	_	_
6	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	8	cop	_	_
7	not	not	PART	RB	_	8	advmod	_	_
8	easy	easy	ADJ	JJ	Degree=Pos	0	root	_	_
9	-	-	PUNCT	HYPH	_	8	punct	_	_
10	to	to	ADP	IN	_	12	case	_	_
11	-	-	PUNCT	HYPH	_	12	punct	_	_
12	use	use	VERB	VB	VerbForm=Inf	8	conj	_	_
13	.	.	PUNCT	.	_	8	punct	_	_


1	This	this	PRON	DT	Number=Sing|PronType=Dem	5	nsubj	_	_
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	5	cop	_	_
3	all	all	ADV	RB	_	5	advmod	_	_
4	-	-	PUNCT	HYPH	_	5	punct	_	_
5	around	around	ADV	RB	_	0	root	_	_
6	.	.	PUNCT	.	_	5	punct	_	_


1	This	this	PRON	DT	Number=Sing|PronType=Dem	5	nsubj	_	_
2	is	be	AUX	VBZ	Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin	5	cop	_	_
3	brand	brand	NOUN	NN	Number=Sing	5	obl:np

