## Stemming

In [1]:
import nltk #natural language tool-kit

In [2]:
from nltk.stem.porter import PorterStemmer

In [3]:
stemmer = PorterStemmer()

In [8]:
words = ['who','ask','asked','asking','work','working','worked']

In [12]:
for word in words:
    print(f"{word} --- {stemmer.stem(word)}")

who --- who
ask --- ask
asked --- ask
asking --- ask
work --- work
working --- work
worked --- work


In [13]:
from nltk.stem.snowball import SnowballStemmer

In [14]:
stemmer1 = SnowballStemmer(language='english')

In [15]:
words = ['who','ask','asked','asking','work','working','worked']

In [16]:
for word in words:
    print(f"{word} --- {stemmer1.stem(word)}")

who --- who
ask --- ask
asked --- ask
asking --- ask
work --- work
working --- work
worked --- work


##Lemmatization

In [17]:
import spacy

In [18]:
nlp = spacy.load('en_core_web_sm')

In [20]:
doc = nlp("I was working and thinking my boss asked me about the progress.")

In [21]:
for token in doc:
    print(token.text, "\t", token.pos_, "\t", token.lemma, "\t", token.lemma_)

I 	 PRON 	 4690420944186131903 	 I
was 	 AUX 	 10382539506755952630 	 be
working 	 VERB 	 10038440415813069799 	 work
and 	 CCONJ 	 2283656566040971221 	 and
thinking 	 VERB 	 16875814820671380748 	 think
my 	 PRON 	 227504873216781231 	 my
boss 	 NOUN 	 12250246145872007735 	 boss
asked 	 VERB 	 203487227105936704 	 ask
me 	 PRON 	 4690420944186131903 	 I
about 	 ADP 	 942632335873952620 	 about
the 	 DET 	 7425985699627899538 	 the
progress 	 NOUN 	 3100199706382727292 	 progress
. 	 PUNCT 	 12646065887601541794 	 .


In [22]:
def show_lemmas(text):
    for token in text:
        print(f'{token.text:{12}} {token.pos_:{6}} {token.lemma:<{22}} {token.lemma_}')

In [23]:
docs = nlp('It was raining outside.')

In [24]:
show_lemmas(docs)

It           PRON   10239237003504588839   it
was          AUX    10382539506755952630   be
raining      VERB   6253719383086150949    rain
outside      ADV    12341974070768608367   outside
.            PUNCT  12646065887601541794   .


##StopWords

In [25]:
import spacy

In [26]:
nlp = spacy.load('en_core_web_sm')

In [27]:
print(nlp.Defaults.stop_words)

{'but', 'formerly', 'under', 'sometimes', 'full', 'least', 'him', 'due', 'whole', 'something', 'again', 'out', 'beforehand', '’ve', 'through', 'though', 'himself', 'therefore', 'until', 'whose', "'m", 'enough', 'anything', 'any', 'how', 'thereafter', 'whoever', 'wherein', 'within', 'whereupon', 'mostly', 'itself', 'really', 'here', 'anywhere', 'doing', 'were', 'being', 'many', 'she', 'n’t', 'which', 'with', 'whenever', 'name', 'give', 'somewhere', 'either', 'seem', 'now', 'perhaps', 'there', 'had', 'both', 'keep', 'same', 'can', 'via', 'another', 'i', 'amongst', 'if', 'six', "'ve", 'at', 'forty', 'yourselves', 'top', 'the', 'becoming', 'by', 'never', 'various', 'it', 'thru', 'those', 'her', 'must', 'other', 'over', 'fifty', 'nothing', 'up', 'much', 'more', 'your', 'its', 'make', 'become', 'noone', 'ever', 'latterly', 'front', 'seeming', 'done', 'or', 'yours', 'whereby', 'almost', 'whereafter', 'twenty', 'has', 'besides', 'although', 'who', 'each', 'everything', 'per', 'please', 'show',

In [28]:
nlp.vocab['therefore'].is_stop

True

In [29]:
nlp.vocab['perhaps'].is_stop

True

In [30]:
nlp.Defaults.stop_words.add('could')

In [31]:
nlp.vocab['could'].is_stop

True

In [32]:
print(nlp.Defaults.stop_words)

{'but', 'formerly', 'under', 'sometimes', 'full', 'least', 'him', 'due', 'whole', 'something', 'again', 'out', 'beforehand', '’ve', 'through', 'though', 'himself', 'therefore', 'until', 'whose', "'m", 'enough', 'anything', 'any', 'how', 'thereafter', 'whoever', 'wherein', 'within', 'whereupon', 'mostly', 'itself', 'really', 'here', 'anywhere', 'doing', 'were', 'being', 'many', 'she', 'n’t', 'which', 'with', 'whenever', 'name', 'give', 'somewhere', 'either', 'seem', 'now', 'perhaps', 'there', 'had', 'both', 'keep', 'same', 'can', 'via', 'another', 'i', 'amongst', 'if', 'six', "'ve", 'at', 'forty', 'yourselves', 'top', 'the', 'becoming', 'by', 'never', 'various', 'it', 'thru', 'those', 'her', 'must', 'other', 'over', 'fifty', 'nothing', 'up', 'much', 'more', 'your', 'its', 'make', 'become', 'noone', 'ever', 'latterly', 'front', 'seeming', 'done', 'or', 'yours', 'whereby', 'almost', 'whereafter', 'twenty', 'has', 'besides', 'although', 'who', 'each', 'everything', 'per', 'please', 'show',

In [33]:
nlp.Defaults.stop_words.remove('n’t')

In [34]:
print(nlp.Defaults.stop_words)

{'but', 'formerly', 'under', 'sometimes', 'full', 'least', 'him', 'due', 'whole', 'something', 'again', 'out', 'beforehand', '’ve', 'through', 'though', 'himself', 'therefore', 'until', 'whose', "'m", 'enough', 'anything', 'any', 'how', 'thereafter', 'whoever', 'wherein', 'within', 'whereupon', 'mostly', 'itself', 'really', 'here', 'anywhere', 'doing', 'were', 'being', 'many', 'she', 'which', 'with', 'whenever', 'name', 'give', 'somewhere', 'either', 'seem', 'now', 'perhaps', 'there', 'had', 'both', 'keep', 'same', 'can', 'via', 'another', 'i', 'amongst', 'if', 'six', "'ve", 'at', 'forty', 'yourselves', 'top', 'the', 'becoming', 'by', 'never', 'various', 'it', 'thru', 'those', 'her', 'must', 'other', 'over', 'fifty', 'nothing', 'up', 'much', 'more', 'your', 'its', 'make', 'become', 'noone', 'ever', 'latterly', 'front', 'seeming', 'done', 'or', 'yours', 'whereby', 'almost', 'whereafter', 'twenty', 'has', 'besides', 'although', 'who', 'each', 'everything', 'per', 'please', 'show', 'sever

In [35]:
nlp.Defaults.stop_words.remove('’ve')

In [36]:
print(nlp.Defaults.stop_words)

{'but', 'formerly', 'under', 'sometimes', 'full', 'least', 'him', 'due', 'whole', 'something', 'again', 'out', 'beforehand', 'through', 'though', 'himself', 'therefore', 'until', 'whose', "'m", 'enough', 'anything', 'any', 'how', 'thereafter', 'whoever', 'wherein', 'within', 'whereupon', 'mostly', 'itself', 'really', 'here', 'anywhere', 'doing', 'were', 'being', 'many', 'she', 'which', 'with', 'whenever', 'name', 'give', 'somewhere', 'either', 'seem', 'now', 'perhaps', 'there', 'had', 'both', 'keep', 'same', 'can', 'via', 'another', 'i', 'amongst', 'if', 'six', "'ve", 'at', 'forty', 'yourselves', 'top', 'the', 'becoming', 'by', 'never', 'various', 'it', 'thru', 'those', 'her', 'must', 'other', 'over', 'fifty', 'nothing', 'up', 'much', 'more', 'your', 'its', 'make', 'become', 'noone', 'ever', 'latterly', 'front', 'seeming', 'done', 'or', 'yours', 'whereby', 'almost', 'whereafter', 'twenty', 'has', 'besides', 'although', 'who', 'each', 'everything', 'per', 'please', 'show', 'several', 's