## Spacy Implementation

In [1]:
import spacy

model = spacy.load("en_core_web_sm")

In [2]:
text = "I bought a pair of watch for Tom and Elizabeth which costs $50 each."
processed = model(text)


print("text -- POS\n ---------")
for token in processed:
    print(f"{token.text} -- {token.pos_}")

text -- POS
 ---------
I -- PRON
bought -- VERB
a -- DET
pair -- NOUN
of -- ADP
watch -- NOUN
for -- ADP
Tom -- PROPN
and -- CCONJ
Elizabeth -- PROPN
which -- PRON
costs -- VERB
$ -- SYM
50 -- NUM
each -- PRON
. -- PUNCT


In [3]:
print("text -- POS -- POS hash\n ---------")
for token in processed:
    print(f"{token.text} -- {token.pos_} -- {token.pos}")

text -- POS -- POS hash
 ---------
I -- PRON -- 95
bought -- VERB -- 100
a -- DET -- 90
pair -- NOUN -- 92
of -- ADP -- 85
watch -- NOUN -- 92
for -- ADP -- 85
Tom -- PROPN -- 96
and -- CCONJ -- 89
Elizabeth -- PROPN -- 96
which -- PRON -- 95
costs -- VERB -- 100
$ -- SYM -- 99
50 -- NUM -- 93
each -- PRON -- 95
. -- PUNCT -- 97


In [4]:
# NLP pipelines
model.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [5]:
model.disable_pipes('parser', 'ner')
model.pipe_names

['tok2vec', 'tagger', 'attribute_ruler', 'lemmatizer']

## Gensim Implementation

In [None]:
import gensim
from gensim.corpora import Dictionary

document = ["Tomorrow Tom and Elizabeth are getting married, I need to buy a gift for them.",
            "I bought a pair of watch for Tom and Elizabeth which costs $50 each."]


# Tokenization
tokens = [[token for token in docs.split()] for docs in document]

# Create dictionary
dictionary = Dictionary(tokens)
print(dictionary)