## Spacy
https://spacy.io/

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [None]:
sentence = "Mark Pedersen is working at Google since 1994."
doc = nlp(sentence)

In [None]:
[(token.text, token.pos_, token.tag_, token.lemma_, token.is_stop, 
  token.ent_iob_, token.ent_type_) for token in doc]

[('Mark', 'PROPN', 'NNP', 'Mark', False, 'B', 'PERSON'),
 ('Pedersen', 'PROPN', 'NNP', 'Pedersen', False, 'I', 'PERSON'),
 ('is', 'AUX', 'VBZ', 'be', True, 'O', ''),
 ('working', 'VERB', 'VBG', 'work', False, 'O', ''),
 ('at', 'ADP', 'IN', 'at', True, 'O', ''),
 ('Google', 'PROPN', 'NNP', 'Google', False, 'B', 'ORG'),
 ('since', 'SCONJ', 'IN', 'since', True, 'O', ''),
 ('1994', 'NUM', 'CD', '1994', False, 'B', 'DATE'),
 ('.', 'PUNCT', '.', '.', False, 'O', '')]

In [None]:
sentence = "Mark Pedersen and John Smith are working at Google since 1994 for $1000 per week."
doc = nlp(sentence)

In [None]:
[(ent.text, ent.label_) for ent in doc.ents]

[('Mark Pedersen', 'PERSON'),
 ('John Smith', 'PERSON'),
 ('Google', 'ORG'),
 ('1994', 'DATE'),
 ('1000', 'MONEY')]

In [None]:
spacy.displacy.render(doc, jupyter = True, style='ent')

In [None]:
with doc.retokenize() as retokenizer:
    tokens = [token for token in doc]
    for ent in doc.ents:
        retokenizer.merge(doc[ent.start:ent.end], 
                          attrs={"LEMMA": " ".join([tokens[i].text for i in range(ent.start, ent.end)])})

In [None]:
[(token.text, token.pos_, token.tag_, token.lemma_, token.is_stop, 
  token.ent_iob_, token.ent_type_) for token in doc]

[('Mark Pedersen', 'PROPN', 'NNP', 'Mark Pedersen', False, 'B', 'PERSON'),
 ('and', 'CCONJ', 'CC', 'and', True, 'O', ''),
 ('John Smith', 'PROPN', 'NNP', 'John Smith', False, 'B', 'PERSON'),
 ('are', 'AUX', 'VBP', 'be', True, 'O', ''),
 ('working', 'VERB', 'VBG', 'work', False, 'O', ''),
 ('at', 'ADP', 'IN', 'at', True, 'O', ''),
 ('Google', 'PROPN', 'NNP', 'Google', False, 'B', 'ORG'),
 ('since', 'SCONJ', 'IN', 'since', True, 'O', ''),
 ('1994', 'NUM', 'CD', '1994', False, 'B', 'DATE'),
 ('for', 'ADP', 'IN', 'for', True, 'O', ''),
 ('$', 'SYM', '$', '$', False, 'O', ''),
 ('1000', 'NUM', 'CD', '1000', False, 'B', 'MONEY'),
 ('per', 'ADP', 'IN', 'per', True, 'O', ''),
 ('week', 'NOUN', 'NN', 'week', False, 'O', ''),
 ('.', 'PUNCT', '.', '.', False, 'O', '')]