## NER

In [2]:
import spacy
from spacy import displacy

NER=spacy.load("en_core_web_sm")

In [4]:
raw_text='''
From 1925 to 1945, Tolkien was the Rawlinson and Bosworth Professor of Anglo-Saxon and a Fellow of Pembroke College, both at the University of Oxford. 
He then moved within the same university to become the Merton Professor of English Language and Literature and Fellow of Merton College, and held these positions from 1945 until his retirement in 1959. 
Tolkien was a close friend of C. S. Lewis, a co-member of the informal literary discussion group The Inklings. 
He was appointed a Commander of the Order of the British Empire by Queen Elizabeth II on 28 March 1972.
'''

text1=NER(raw_text)

for word in text1.ents:
    print(word.text, word.label_)

From 1925 to 1945 DATE
Tolkien GPE
Rawlinson PERSON
Anglo-Saxon ORG
the University of Oxford ORG
English Language and Literature and Fellow of Merton College WORK_OF_ART
1945 DATE
1959 DATE
Tolkien PERSON
C. S. Lewis PERSON
Elizabeth II PERSON
28 March 1972 DATE


In [5]:
displacy.render(text1, style='ent',jupyter=True)

## POS Tagging

In [8]:
import pandas as pd

text=["You know the greatest lesson of history?",
      "It's that history is whatever the victors say it is.",
      "That's the lesson. Whoever wins, that's who decides the history."]

df= pd.DataFrame(text,columns=['Sentence'])

print(df)

                                            Sentence
0           You know the greatest lesson of history?
1  It's that history is whatever the victors say ...
2  That's the lesson. Whoever wins, that's who de...


In [13]:
import spacy

#load the small English Model
nlp = spacy.load('en_core_web_sm')

#lists to store tokens and tags
token=[]
pos=[]

for sent in nlp.pipe(df['Sentence']):
    if sent.has_annotation('DEP'):
        #add tokens present in sentnece to token list
        token.append([word.text for word in sent])
        #add POS tags for each token to pos list
        pos.append([word.pos_ for word in sent])

In [14]:
print(df)

                                            Sentence
0           You know the greatest lesson of history?
1  It's that history is whatever the victors say ...
2  That's the lesson. Whoever wins, that's who de...


In [15]:
print(token)

[['You', 'know', 'the', 'greatest', 'lesson', 'of', 'history', '?'], ['It', "'s", 'that', 'history', 'is', 'whatever', 'the', 'victors', 'say', 'it', 'is', '.'], ['That', "'s", 'the', 'lesson', '.', 'Whoever', 'wins', ',', 'that', "'s", 'who', 'decides', 'the', 'history', '.']]


In [17]:
print(pos)

[['PRON', 'VERB', 'DET', 'ADJ', 'NOUN', 'ADP', 'NOUN', 'PUNCT'], ['PRON', 'AUX', 'SCONJ', 'NOUN', 'AUX', 'DET', 'DET', 'NOUN', 'VERB', 'PRON', 'AUX', 'PUNCT'], ['DET', 'AUX', 'DET', 'NOUN', 'PUNCT', 'PRON', 'VERB', 'PUNCT', 'DET', 'AUX', 'PRON', 'VERB', 'DET', 'NOUN', 'PUNCT']]


## Dependency Parsing

In [20]:
nlp= spacy.load('en_core_web_sm')

sentence = 'I saw a kitten eating chicken in the kitchen.'

#nlp function returns an obj with individual token information, linguistic features and relations
doc=nlp(sentence)

In [23]:
print('{:<15}|{:<8}|{:<15}|{:<20}'.format('Token','Relation','Head','Children'))
print('-'*70)
for token in doc:
    #Print the token, dependency nature, head and all dependents of the token
    print("{:<15} | {:<18} | {:<15} | {:<20}"
          .format(str(token.text), str(token.dep_), str(token.head.text), str([child for child in token.children])))

Token          |Relation|Head           |Children            
----------------------------------------------------------------------
I               | nsubj              | saw             | []                  
saw             | ROOT               | saw             | [I, chicken, .]     
a               | det                | chicken         | []                  
kitten          | amod               | chicken         | []                  
eating          | amod               | chicken         | []                  
chicken         | dobj               | saw             | [a, kitten, eating, in]
in              | prep               | chicken         | [kitchen]           
the             | det                | kitchen         | []                  
kitchen         | pobj               | in              | [the]               
.               | punct              | saw             | []                  


In [25]:
#use displacy to render the text
displacy.render(doc, style='dep',jupyter=True, options={'distance':120})