""" 

Before running the code below, we need to install spacey using the command: pip install spacy

Once pip spacy is installed, we download the English model, using the command: python -m spacy download en_core_web_sm. If you running in Jupyter add !.

Follow the code below

"""

In [7]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [8]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
                                              0.0/12.8 MB ? eta -:--:--
                                              0.0/12.8 MB ? eta -:--:--
                                              0.1/12.8 MB 1.3 MB/s eta 0:00:10
                                              0.2/12.8 MB 1.8 MB/s eta 0:00:07
     -                                        0.4/12.8 MB 2.1 MB/s eta 0:00:06
     -                                        0.5/12.8 MB 2.2 MB/s eta 0:00:06
     -                                        0.6/12.8 MB 2.3 MB/s eta 0:00:06
     --                                       0.7/12.8 MB 2.4 MB/s eta 0:00:06
     --                                       0.9/12.8 MB 2.4 MB/s eta 0:00:06
     ---                                      1.0/12.8 MB 2.4 MB/s eta 0:00:05
     ---                                      1.

In [9]:
import spacy
# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

In [10]:
gardenpathSentences = [
    "The old man the boats.",
    "The complex houses married and single soldiers and their families."
]


In [11]:
gardenpathSentences = [
    "The old man the boats.",
    "The complex houses married and single soldiers and their families.",
    "Mary gave the child a Band-Aid.",
    "That Jill is never here hurts.",
    "The cotton clothing is made of grows in Mississippi."
]


In [12]:
# Process each sentence
for sentence in gardenpathSentences:
    doc = nlp(sentence)
    
    # Display categorized tokens and named entities
    print(f"Original Sentence: {sentence}")
    print("\nCategorized Tokens:")
    for token in doc:
        print(f"{token.text}: {token.pos_}")
    
    print("\nNamed Entities:")
    for ent in doc.ents:
        print(f"{ent.text}: {ent.label_}")
    
    print("\n----------------------\n")

Original Sentence: The old man the boats.

Categorized Tokens:
The: DET
old: ADJ
man: NOUN
the: DET
boats: NOUN
.: PUNCT

Named Entities:

----------------------

Original Sentence: The complex houses married and single soldiers and their families.

Categorized Tokens:
The: DET
complex: ADJ
houses: NOUN
married: VERB
and: CCONJ
single: ADJ
soldiers: NOUN
and: CCONJ
their: PRON
families: NOUN
.: PUNCT

Named Entities:

----------------------

Original Sentence: Mary gave the child a Band-Aid.

Categorized Tokens:
Mary: PROPN
gave: VERB
the: DET
child: NOUN
a: DET
Band: PROPN
-: PUNCT
Aid: NOUN
.: PUNCT

Named Entities:
Mary: PERSON

----------------------

Original Sentence: That Jill is never here hurts.

Categorized Tokens:
That: SCONJ
Jill: PROPN
is: AUX
never: ADV
here: ADV
hurts: VERB
.: PUNCT

Named Entities:
Jill: PERSON

----------------------

Original Sentence: The cotton clothing is made of grows in Mississippi.

Categorized Tokens:
The: DET
cotton: NOUN
clothing: NOUN
is: AU

In [5]:
# Tokenize and perform named entity recognition for each sentence
for sentence in gardenpathSentences:
    doc = nlp(sentence)
    
    # Tokenize and display each token
    tokens = [token.text for token in doc]
    print(f"Tokens for '{sentence}': {tokens}")
    
    # Perform named entity recognition and display entities
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    print(f"Named Entities for '{sentence}': {entities}")
    
    print("\n----------------------\n")

Tokens for 'The old man the boats.': ['The', 'old', 'man', 'the', 'boats', '.']
Named Entities for 'The old man the boats.': []

----------------------

Tokens for 'The complex houses married and single soldiers and their families.': ['The', 'complex', 'houses', 'married', 'and', 'single', 'soldiers', 'and', 'their', 'families', '.']
Named Entities for 'The complex houses married and single soldiers and their families.': []

----------------------

Tokens for 'Mary gave the child a Band-Aid.': ['Mary', 'gave', 'the', 'child', 'a', 'Band', '-', 'Aid', '.']
Named Entities for 'Mary gave the child a Band-Aid.': [('Mary', 'PERSON')]

----------------------

Tokens for 'That Jill is never here hurts.': ['That', 'Jill', 'is', 'never', 'here', 'hurts', '.']
Named Entities for 'That Jill is never here hurts.': [('Jill', 'PERSON')]

----------------------

Tokens for 'The cotton clothing is made of grows in Mississippi.': ['The', 'cotton', 'clothing', 'is', 'made', 'of', 'grows', 'in', 'Mississ

In [13]:
# Lets go one better and rewrite the code to include comments
# Tokenise and perform named entity recognition for each sentence
for sentence in gardenpathSentences:
    doc = nlp(sentence)
    
    # Tokenize and display each token
    tokens = [token.text for token in doc]
    print(f"Tokens for '{sentence}': {tokens}")
    
    # Perform named entity recognition and display entities
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    print(f"Named Entities for '{sentence}': {entities}")
    
    # Explain the meaning of each entity
    for ent, label in entities:
        explanation = spacy.explain(label)
        print(f"Explanation for '{ent}' (Entity Type: {label}): {explanation}")
    
    print("\n----------------------\n")

Tokens for 'The old man the boats.': ['The', 'old', 'man', 'the', 'boats', '.']
Named Entities for 'The old man the boats.': []

----------------------

Tokens for 'The complex houses married and single soldiers and their families.': ['The', 'complex', 'houses', 'married', 'and', 'single', 'soldiers', 'and', 'their', 'families', '.']
Named Entities for 'The complex houses married and single soldiers and their families.': []

----------------------

Tokens for 'Mary gave the child a Band-Aid.': ['Mary', 'gave', 'the', 'child', 'a', 'Band', '-', 'Aid', '.']
Named Entities for 'Mary gave the child a Band-Aid.': [('Mary', 'PERSON')]
Explanation for 'Mary' (Entity Type: PERSON): People, including fictional

----------------------

Tokens for 'That Jill is never here hurts.': ['That', 'Jill', 'is', 'never', 'here', 'hurts', '.']
Named Entities for 'That Jill is never here hurts.': [('Jill', 'PERSON')]
Explanation for 'Jill' (Entity Type: PERSON): People, including fictional

----------------

"""

The old man the boats."

Named Entities: None
Explanation: There are no named entities in this sentence.
"The complex houses married and single soldiers and their families."

Named Entities: None
Explanation: There are no named entities in this sentence.
"Mary gave the child a Band-Aid."

Named Entities: [('Mary', 'PERSON')]
Explanation: "PERSON" refers to a real or fictional person. In this case, "Mary" is recognized as a person.
"That Jill is never here hurts."

Named Entities: [('Jill', 'PERSON')]
Explanation: "PERSON" refers to a real or fictional person. In this case, "Jill" is recognized as a person.
"The cotton clothing is made of grows in Mississippi."

Named Entities: [('Mississippi', 'GPE')]
Explanation: "GPE" stands for geopolitical entity. In this case, "Mississippi" is recognized as a geopolitical entity.

"""