In [None]:
import spacy
text_engine = spacy.load("en_core_web_sm")

In [None]:
doc = text_engine("Apple is looking to buying U.K. startup for $1 billion.")

In [None]:
# Tokenization : Splitting Paragraphs/Sentences into words.
for token in doc:
    print(token.text)

In [None]:
# Words details POS(Part of Speech), Dependency, Tag, Shape..etc

for token in doc:
    print(token.text)
    print("\t",token.pos_)
    print("\t",token.dep_)
    print("\t",token.tag_)
    print("\t",token.is_alpha)
    print("\t",token.is_stop)
    print("\t",token.shape_)
    print("\t",token.lemma_)

In [None]:
# Lemmatization: process of getting root word.
# Example: going   --> go
#          reading --> read
#          working --> work

sample = text_engine("going to office")

In [None]:
for token in sample:
    print(token.text, token.lemma_, token.pos_)

In [None]:
# Named Entities
# Real world entities name like Country, City, Fruits, Animal, Product, books...etc

sample = text_engine("Apple is looking to buying U.K. startup for $1 billion")

for ent in sample.ents:
    print(ent.text, ent.label_)
    
# GPE: Geopolitical Entity
# ORG: Organization

In [None]:
# List English Stop words
# Stop words are common words used to form the sentense

news = """

A huge crater next to a row of vehicles in the city of Zaporizhzhia testifies to the violence of the attack. Windows and windscreens were smashed in.

The BBC saw half a dozen bodies lying at the scene, apparently civilians. Baggage and coats strewed the tarmac.

One survivor told the BBC her boss had been killed in the attack.

"She had two kids. I left the cafe to use the restroom when it happened. I ran back and tried to find her. The cafe was demolished, there were many bodies around. It was all so very horrifying," Viktoriia Yosypenko said.



"""

In [None]:
sample = text_engine(news)

In [None]:
for token in sample:
    if token.is_stop:
        print(token.text)

In [None]:
# Remove stop words and display other meaning full tokens 
for token in sample:
    if ~token.is_stop:
        print(token.text)

In [None]:
# Display Entity Name
for ent in sample.ents:
    print(ent.text, ent.label_)

In [None]:
# Similarity between two sentence & words with respect to context.

word1 = text_engine("I'm going to College.")
word2 = text_engine("I'm going to School.")


word1.similarity(word2)