In [9]:
import spacy
from termcolor import colored
from spacy import displacy
from spacy.lang.en import English

In [11]:
# Comparison between spacy vs Google Syntax net made by owner of Spacy(Honibal):
#https://www.quora.com/How-does-Googles-open-source-natural-language-parser-SyntaxNet-compare-with-spaCy-io-or-Stanfords-CoreNLP

# Spacy Dependency Tree

In [27]:
class DependencyTree:
    
    
    def __init__(self):
        
        self.spacy_parser = spacy.load('en')
        
        
    def getDependencies(self, sentence):
        
        dependency_tree = []
        for token in sentence:
            
            if token.head.text == token.text:
                dependency_tree.append(["root", "ROOT", token.text])
                
            else:
                dependency_tree.append([token.dep_, token.head.text, token.text])
            
        return dependency_tree

    
    def getTokenizedSentences(self, sentences):
        
        sentences = self.spacy_parser(unicode(sentences))
        tokenized_sentence = [sent for sent in sentences.sents]
        
        return tokenized_sentence
    
    def getTokenizedWords(self, sentence):
        
        sentence = self.spacy_parser(unicode(sentence))
        tokenized_words = [token.text for token in sentence]
        
        return tokenized_words
    
    def printFormat(self, tokenizedSentences):
        
        print ("")
        print (" ===================== dependencies ===========================")
        print ("In each element of list, 2nd word is parent and 3rd word is child and 1st is relation between parent and child")
        print ("")
        for sent in tokenizedSentences:   
            print ("Sentence ")
            print (colored(sent, 'blue'))
            dependencies = dependencyTree.getDependencies(tokenizedSentences[0])
            print ("Relations")
            print (dependencies)
            print ("")
            
    '''
    Function to show dependencies graphically
    '''
    
    def displayView(self, sentence):
        
        doc = self.spacy_parser(unicode(sentence))
        displacy.render(doc, style='dep', jupyter=True, options={'distance': 150})
        

In [28]:
dependencyTree = DependencyTree()

# Example 1 

In [29]:
text = "I am going to University. Jack read the book"
tokenizedSentences = dependencyTree.getTokenizedSentences(text)
print ("Tokenized Sentences ", tokenizedSentences)
tokenizedWords = dependencyTree.getTokenizedWords(text)
print ("Tokenized Words ", tokenizedWords)
se1 = str(tokenizedSentences[0])
print (se1)
for s in tokenizedSentences:
    se1 = str(s)
    dependencyTree.displayView(se1)
    
    
dependencyTree.printFormat(tokenizedSentences)


('Tokenized Sentences ', [I am going to University., Jack read the book])
('Tokenized Words ', [u'I', u'am', u'going', u'to', u'University', u'.', u'Jack', u'read', u'the', u'book'])
I am going to University.



In each element of list, 2nd word is parent and 3rd word is child and 1st is relation between parent and child

Sentence 
[34mI am going to University.[0m
Relations
[[u'nsubj', u'going', u'I'], [u'aux', u'going', u'am'], ['root', 'ROOT', u'going'], [u'prep', u'going', u'to'], [u'pobj', u'to', u'University'], [u'punct', u'going', u'.']]

Sentence 
[34mJack read the book[0m
Relations
[[u'nsubj', u'going', u'I'], [u'aux', u'going', u'am'], ['root', 'ROOT', u'going'], [u'prep', u'going', u'to'], [u'pobj', u'to', u'University'], [u'punct', u'going', u'.']]



# Example 2 : Long Sentence

In [30]:
text = "John is going to the University, where he attends his classes"
tokenizedSentences = dependencyTree.getTokenizedSentences(text)
print ("Tokenized Sentences ", tokenizedSentences)
tokenizedWords = dependencyTree.getTokenizedWords(text)
print ("Tokenized Words ", tokenizedWords)

dependencyTree.printFormat(tokenizedSentences)
dependencyTree.displayView(text)

('Tokenized Sentences ', [John is going to the University, where he attends his classes])
('Tokenized Words ', [u'John', u'is', u'going', u'to', u'the', u'University', u',', u'where', u'he', u'attends', u'his', u'classes'])

In each element of list, 2nd word is parent and 3rd word is child and 1st is relation between parent and child

Sentence 
[34mJohn is going to the University, where he attends his classes[0m
Relations
[[u'nsubj', u'going', u'John'], [u'aux', u'going', u'is'], ['root', 'ROOT', u'going'], [u'prep', u'going', u'to'], [u'det', u'University', u'the'], [u'pobj', u'to', u'University'], [u'punct', u'University', u','], [u'advmod', u'attends', u'where'], [u'nsubj', u'attends', u'he'], [u'relcl', u'University', u'attends'], [u'poss', u'classes', u'his'], [u'dobj', u'attends', u'classes']]

