# Demo - simple NLP processing

In [42]:
# Imports
import spacy
from IPython.display import HTML, display
from spacy import displacy

In [43]:
# Download (medium) English model
if not spacy.util.is_package("en_core_web_md"):
    !python -m spacy download en_core_web_md

In [44]:
# Create NLP object (language model)
nlp = spacy.load("en_core_web_md")

## Insert your own text
Use anything - wikipedia article, news, chat messages, whatever. Replace the text inside the triple quotes with your own.

In [None]:
text = """ 
    The number of permanent members should be increased at the Security Council. 
    At least, there should be two new seats for Asia, two for Africa and one 
    for Latin America. No single state should have veto power. And, if a 
    member of the Security Council violates the UN Charter, its voting rights 
    should be suspended.
"""

## Process / visualize text

In [46]:
# Remove newlines, tabs, extra spaces
text = " ".join(text.split())
print(text)

The number of permanent members should be increased at the Security Council. At least, there should be two new seats for Asia, two for Africa and one for Latin America. No single state should have veto power. And, if a member of the Security Council violates the UN Charter, its voting rights should be suspended.


In [47]:
# Process the text with spaCy
doc = nlp(text)

In [48]:
# Visualize named entities in the document
display(HTML(displacy.render(doc, style="ent", jupyter=False)))

In [49]:
# Show word POS tags and dependencies for the first sentence
first_sentence = next(doc.sents)
display(
    HTML(
        displacy.render(
            first_sentence, style="dep", jupyter=False, options={"compact": False}
        )
    )
)

In [50]:
# Find all the adjectives in the text
adjectives = [token.text for token in doc if token.pos_ == "ADJ"]
print("Adjectives in the text:")
print(adjectives)

Adjectives in the text:
['permanent', 'least', 'new', 'single']


In [51]:
# Print text without stop words or punctuation, in lower case
filtered_text = " ".join(
    [token.text.lower() for token in doc if not (token.is_stop or token.is_punct)]
)

print("Filtered text:")
print(filtered_text)

Filtered text:
number permanent members increased security council new seats asia africa latin america single state veto power member security council violates un charter voting rights suspended
