In [1]:
import spacy
import nltk
from nltk.stem import PorterStemmer

In [2]:
stemmer = PorterStemmer()            # loading Stemming method for reducing text

In [3]:
list_ele = ['batting', 'fielding', 'reliability', 'consumed', 'customer', 'walked', 'belt']
for i in list_ele:
    print(i, "|", stemmer.stem(i))

batting | bat
fielding | field
reliability | reliabl
consumed | consum
customer | custom
walked | walk
belt | belt


In [4]:
nlp = spacy.load('en_core_web_sm')
word = nlp(" ".join(list_ele))

In [5]:
for i in word:
    print(i.text, "|", i.lemma_)

batting | batting
fielding | fielding
reliability | reliability
consumed | consume
customer | customer
walked | walk
belt | belt


In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
text = "hi bro this is your friend, yes! bruhh i know you are my bruh!"
word = nlp(text)

In [9]:
for i in word:
    print(i, "|", i.lemma_)

hi | hi
bro | bro
this | this
is | be
your | your
friend | friend
, | ,
yes | yes
! | !
bruhh | bruhh
i | I
know | know
you | you
are | be
my | my
bruh | bruh
! | !


In [10]:
custom = nlp.get_pipe("attribute_ruler")

custom.add([[{'TEXT':'bro'}], [{'TEXT':'bruh'}], [{'TEXT':'bruhh'}]], {"LEMMA":"brother"})

In [12]:
text = "hi bro this is your friend, yes! bruhh i know you are my bruh!"
word = nlp(text)
for i in word:
    print(i, "|", i.lemma_)

hi | hi
bro | brother
this | this
is | be
your | your
friend | friend
, | ,
yes | yes
! | !
bruhh | brother
i | I
know | know
you | you
are | be
my | my
bruh | brother
! | !


# Exercise

In [13]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
lst_words = ['running', 'painting', 'walking', 'dressing', 'likely', 'children', 'whom', 'good', 'ate', 'fishing']

for i in lst_words:
    print(i, "|", stemmer.stem(i))

running | run
painting | paint
walking | walk
dressing | dress
likely | like
children | children
whom | whom
good | good
ate | ate
fishing | fish


In [15]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("running painting walking dressing likely children who good ate fishing")
for i in doc:
    print(i, "|", i.lemma_)

running | run
painting | painting
walking | walking
dressing | dress
likely | likely
children | child
who | who
good | good
ate | eat
fishing | fish


In [26]:
from nltk.tokenize import sent_tokenize, word_tokenize

doc = word_tokenize("running painting walking dressing likely children who good ate fishing")
for i in doc:
    print(i, "|", stemmer.stem(i))

running | run
painting | paint
walking | walk
dressing | dress
likely | like
children | children
who | who
good | good
ate | ate
fishing | fish


In [27]:
text = """Latha is very multi talented girl.She is good at many skills like dancing, running, singing, playing.She also likes eating Pav Bhagi. she has a 
habit of fishing and swimming too.Besides all this, she is a wonderful at cooking too.
"""

In [28]:
word1 = nlp(text)
word2 = word_tokenize(text)

In [29]:
word1_1 = {i:i.lemma_ for i in word1}
word2_2 = {i:stemmer.stem(i) for i in word2}
print(word1_1)
print(word2_2)

{Latha: 'Latha', is: 'be', very: 'very', multi: 'multi', talented: 'talented', girl: 'girl', .: '.', She: 'she', is: 'be', good: 'good', at: 'at', many: 'many', skills: 'skill', like: 'like', dancing: 'dancing', ,: ',', running: 'running', ,: ',', singing: 'singing', ,: ',', playing: 'play', .: '.', She: 'she', also: 'also', likes: 'like', eating: 'eat', Pav: 'Pav', Bhagi: 'Bhagi', .: '.', she: 'she', has: 'have', a: 'a', 
: '\n', habit: 'habit', of: 'of', fishing: 'fishing', and: 'and', swimming: 'swim', too: 'too', .: '.', Besides: 'besides', all: 'all', this: 'this', ,: ',', she: 'she', is: 'be', a: 'a', wonderful: 'wonderful', at: 'at', cooking: 'cook', too: 'too', .: '.', 
: '\n'}
{'Latha': 'latha', 'is': 'is', 'very': 'veri', 'multi': 'multi', 'talented': 'talent', 'girl.She': 'girl.sh', 'good': 'good', 'at': 'at', 'many': 'mani', 'skills': 'skill', 'like': 'like', 'dancing': 'danc', ',': ',', 'running': 'run', 'singing': 'sing', 'playing.She': 'playing.sh', 'also': 'also', 'like