# Entity Extraction
-----------

## Exercise 1: pre-built NER(Named Entity Recognition)

In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp("my friend Mary has worked at Google since 2009")
for ent in doc.ents:
    print(ent.text, ent.label_)

Mary PERSON
Google ORG
2009 DATE


## Exercise 2: Using spaCy's entity recognizer

In [2]:
nlp = spacy.load('en_core_web_sm')
# Define included entities
include_entities = ['DATE', 'ORG', 'PERSON']
include_entities

['DATE', 'ORG', 'PERSON']

In [3]:
# Define extract_entities()
def extract_entities(message):
    # Create a dict to hold the entities
    ents = dict.fromkeys(include_entities)
    print(ents)
    # Create a spacy document
    doc = nlp(message)
    for ent in doc.ents:
        if ent.label_ in include_entities:
            print(ent.label_)
            # Save interesting entities
            ents[ent.label_] = ent.text
    return ents

In [4]:
print(extract_entities('friends called Mary who have worked at Google since 2010'))


{'DATE': None, 'ORG': None, 'PERSON': None}
PERSON
ORG
DATE
{'DATE': '2010', 'ORG': 'Google', 'PERSON': 'Mary'}


In [5]:
print(extract_entities('people who graduated from MIT in 1999'))

{'DATE': None, 'ORG': None, 'PERSON': None}
ORG
DATE
{'DATE': '1999', 'ORG': 'MIT', 'PERSON': None}


## Exercise 3: Assigning roles using spaCy's parser

In [6]:
colors = ['black', 'red', 'blue']
items = ['shoes', 'handback', 'jacket', 'jeans']

In [7]:
def entity_type(word):
    _type = None
    if word.text in colors:
        _type = "color"
    elif word.text in items:
        _type = "item"
    return _type

In [8]:
# Create the document
doc = nlp("let's see that jacket in red and some blue jeans")

In [9]:
doc

let's see that jacket in red and some blue jeans

In [10]:
# Iterate over parents in parse tree until an item entity is found
def find_parent_item(word):
    # Iterate over the word's ancestors
    for parent in word.ancestors:
        # Check for an "item" entity
        if entity_type(parent) == "item":
            return parent.text
    return None

In [11]:
# For all color entities, find their parent item
def assign_colors(doc):
    # Iterate over the document
    for word in doc:
        # Check for "color" entities
        if entity_type(word) == "color":
            # Find the parent
            item =  find_parent_item(word)
            print("item: {0} has color : {1}".format(item, word))

In [12]:
doc

let's see that jacket in red and some blue jeans

In [13]:
# Assign the colors
assign_colors(doc)

item: jacket has color : red
item: jeans has color : blue
