<a href="https://colab.research.google.com/github/kullawattana/thesis_2020_spacy_colab/blob/master/39_sample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#NLP Object

# Import the English language class
from spacy.lang.en import English 
# Create the nlpframework object
nlp = English()

In [None]:
#--------------------------Doc Object--------------------------
# Created by processing a string of text with the nlpframework object
doc = nlp("Hello world!")
# Iterate over tokens in a Doc
for token in doc: 
    print(token.text)

Hello
world
!


In [None]:
#--------------------------Token Object------------------------
# Index into the Doc to get a single Token
#[token] [token] [token] [token]
token = doc[1]
# Get the token text via the .text attribute
print(token.text)

world


In [None]:
#--------------------------Span--------------------------------
# A slice from the Doc is a Span object
#[token] {[token] [token] [token]}
span = doc[1:4]
# Get the span text via the .text attribute
print(span.text)

world!


In [None]:
#------------------------Lexical attributes--------------------
doc = nlp("It costs $5.")
print('Index: ', [token.i for token in doc]) 
print('Text: ', [token.text for token in doc])
print('is_alpha:', [token.is_alpha for token in doc]) 
print('is_punct:', [token.is_punct for token in doc]) 
print('like_num:', [token.like_num for token in doc])

Index:  [0, 1, 2, 3, 4]
Text:  ['It', 'costs', '$', '5', '.']
is_alpha: [True, True, False, False, False]
is_punct: [False, False, False, False, True]
like_num: [False, False, False, True, False]


In [None]:
#-------------------Predicting Part-of-speech Tags--------------
import spacy
# Load the small English model
nlp = spacy.load('en_core_web_sm') 
# Process a text
doc = nlp("She ate the pizza") 
# Iterate over the tokens
for token in doc:
    # Print the text and the predicted part-of-speech tag
    print(token.text, token.pos_)

for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)
    #She PRON nsubj ate ate VERB ROOT ate the DET det pizza pizza NOUN dobj ate

She PRON
ate VERB
the DET
pizza NOUN
She PRON nsubj ate
ate VERB ROOT ate
the DET det pizza
pizza NOUN dobj ate


In [None]:
#------------------Predicting Named Entities-----------------
doc = nlp("Apple is looking at buying U.K. startup for $1 billion") 
for ent in doc.ents:
    print(ent.text, ent.label_)
  
spacy.explain('GPE')
spacy.explain('NNP')
spacy.explain('dobj')

Apple ORG
U.K. GPE
$1 billion MONEY


'direct object'

In [None]:
#----------------Using Matcher----------------------
import spacy
# Import the Matcher
from spacy.matcher import Matcher
# Load a model and create the nlpframework object
nlp = spacy.load('en_core_web_sm')
# Initialize the matcher with the shared vocab
matcher = Matcher(nlp.vocab)
# Add the pattern to the matcher
pattern = [{'ORTH': 'iPhone'}, {'ORTH': 'X'}] 
matcher.add('IPHONE_PATTERN', None, pattern)
# Process some text
doc = nlp("New iPhone X release date leaked")
# Call the matcher on the doc
matches = matcher(doc)

In [None]:
#-----------------Matcher 2---------------------
# Call the matcher on the doc
doc = nlp("New iPhone X release date leaked") 
matches = matcher(doc)
# Iterate over the matches
for match_id, start, end in matches:
# Get the matched span
    matched_span = doc[start:end] 
    print(matched_span.text)

iPhone X


In [None]:
#-----------Matching lexical attributes---------
pattern = [{'IS_DIGIT': True}, {'LOWER': 'fifa'}, {'LOWER': 'world'}, {'LOWER': 'cup'}, {'IS_PUNCT': True}]
doc = nlp("2018 FIFA World Cup: France won!")
'''
2018 FIFA World Cup:
'''

#Matching other token attributes
pattern = [{'LEMMA': 'love', 'POS': 'VERB'}, {'POS': 'NOUN'}]
doc = nlp("I loved dogs but now I love cats more.")
'''
loved dogs
love cats
'''

#Using operators and quantiers (1)
pattern = [{'LEMMA': 'buy'},
{'POS': 'DET', 'OP': '?'}, # optional: match 0 or 1 times {'POS': 'NOUN'}
]
doc = nlp("I bought a smartphone. Now I'm buying apps.")
'''
bought a smartphone 
buying apps
'''

'\nbought a smartphone \nbuying apps\n'