In [1]:
import nltk

In [2]:
from nltk.stem.porter import *

In [3]:
p_stemmer = PorterStemmer()
words = ['run','runner','running','ran','runs','easily','fairly']

In [4]:
for word in words:
    print(word+' --> '+p_stemmer.stem(word))

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fairli


In [5]:
from nltk.stem.snowball import SnowballStemmer

In [6]:
# The Snowball Stemmer requires that you pass a language parameter
s_stemmer = SnowballStemmer(language='english')

In [7]:
words = ['run','runner','running','ran','runs','easily','fairly']
for word in words:
    print(word+' --> '+s_stemmer.stem(word))

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fair


# 01.10.2024

In [8]:
p_stemmer1 = PorterStemmer()
words = ['Consolingly']

In [9]:
for word in words:
    print(word+' --> '+p_stemmer1.stem(word))

Consolingly --> consolingli


In [10]:
s_stemmer1 = SnowballStemmer(language='english')

In [11]:
words = ['Consolingly']
for word in words:
    print(word+' --> '+s_stemmer.stem(word))

Consolingly --> consol


In [12]:
import spacy

In [13]:
nlp = spacy.load('en_core_web_sm')

In [14]:
phrase = nlp(u'I am meeting him tomorrow at the meeting')
for p in phrase:
    print(p)

I
am
meeting
him
tomorrow
at
the
meeting


In [15]:
p_stem = PorterStemmer()
words = ['I am meeting him tomorrow at the meeting']

In [16]:
for word in words:
    print(word+' --> '+p_stem.stem(word))

I am meeting him tomorrow at the meeting --> i am meeting him tomorrow at the meet


In [17]:
import spacy 

In [18]:
nlp = spacy.load('en_core_web_sm')

In [19]:
doc1 = nlp(u'I am runner running in a race because I love to run since I ran today')

In [20]:
for token in doc1:
    print(token.text,  '\t' , token.pos_,  '\t',  token.lemma_)

I 	 PRON 	 I
am 	 AUX 	 be
runner 	 NOUN 	 runner
running 	 VERB 	 run
in 	 ADP 	 in
a 	 DET 	 a
race 	 NOUN 	 race
because 	 SCONJ 	 because
I 	 PRON 	 I
love 	 VERB 	 love
to 	 PART 	 to
run 	 VERB 	 run
since 	 SCONJ 	 since
I 	 PRON 	 I
ran 	 VERB 	 run
today 	 NOUN 	 today


In [21]:
def show_lemmas(text):
    for token in text:
        print(f'{token.text:{12}} {token.pos_:{6}} {token.lemma:<{22}} {token.lemma_}')

In [22]:
doc2 = nlp(u'I saw eighteen mice today!')

In [23]:
show_lemmas(doc2)

I            PRON   4690420944186131903    I
saw          VERB   11925638236994514241   see
eighteen     NUM    9609336664675087640    eighteen
mice         NOUN   1384165645700560590    mouse
today        NOUN   11042482332948150395   today
!            PUNCT  17494803046312582752   !


In [24]:
doc3 = nlp(u'I am meeting him tomorrow at the meeting.')

In [25]:
show_lemmas(doc3)

I            PRON   4690420944186131903    I
am           AUX    10382539506755952630   be
meeting      VERB   6880656908171229526    meet
him          PRON   1655312771067108281    he
tomorrow     NOUN   3573583789758258062    tomorrow
at           ADP    11667289587015813222   at
the          DET    7425985699627899538    the
meeting      NOUN   14798207169164081740   meeting
.            PUNCT  12646065887601541794   .


In [26]:
doc4 = nlp(u"That's an enormous automobile")

In [27]:
show_lemmas(doc4)

That         PRON   4380130941430378203    that
's           AUX    10382539506755952630   be
an           DET    15099054000809333061   an
enormous     ADJ    17917224542039855524   enormous
automobile   NOUN   7211811266693931283    automobile


# 03.10.2024

In [28]:
#Stops Words

In [29]:
#Words like "a" and "the" appear so frequently that they don't require tagging as throughly as nouns, verbs and modifiers.
#We call these stop words, and they can be filtered from the texf to be processed. Spacy holds a built-in list of some 305 
#English stop words.

In [30]:
#Perform standard imports

In [31]:
import spacy

In [32]:
nlp = spacy.load("en_core_web_sm")

In [33]:
#Print the set of Spacy's default stop words (remember that sets are unordered):

In [34]:
print(nlp.Defaults.stop_words)

{'alone', 'within', 'it', 'least', 'over', 'much', 'when', 'how', '’d', 'us', 'empty', 'not', 'therein', 'ten', 'full', 'had', 'please', 'meanwhile', 'each', 'be', 'do', 'herself', 'take', 'again', 'back', 'whither', 'fifteen', 'often', 'others', 'keep', 'then', 'through', 'everything', 'quite', 'too', 'among', 'twelve', 'both', 'of', 'same', 'beyond', 'an', 'throughout', 'hereby', 'else', 'at', 'get', 'bottom', 'one', 'serious', 'until', 'former', 'someone', 'into', 'thence', 'sixty', 'seems', 'beforehand', 'six', 'more', 'who', 'together', 'per', 'whereupon', 'me', 'several', 'side', '’ve', 'n‘t', 'most', 'after', 'anywhere', 'must', 'such', 'are', 'anyhow', 'my', 'few', 'hereafter', 'am', 'will', '‘d', 'have', 'some', 'here', 'why', 'those', 'own', 'himself', 'around', 'we', 'thereafter', 'forty', 'whence', 'their', 'above', 'enough', 'whereby', 'while', 'been', 'her', 'n’t', 'via', 'to', 'herein', 'eleven', 'may', 'that', 'therefore', 'yourself', 'can', 'he', '‘ll', 'top', 're', 'a

In [35]:
len(nlp.Defaults.stop_words)

326

In [36]:
#To see if a word is a stop word

In [37]:
nlp.vocab['myself'].is_stop

True

In [38]:
nlp.vocab['latter'].is_stop

True

In [41]:
nlp.vocab['mystery'].is_stop

False

In [40]:
nlp.vocab['btw'].is_stop

False

In [44]:
nlp.Defaults.stop_words.add('btw')

In [45]:
nlp.vocab['btw'].is_stop=True

In [46]:
len(nlp.Defaults.stop_words)

327

In [47]:
nlp.vocab['btw'].is_stop

True

In [48]:
#To remove a stop word

In [49]:
#Remove the word from the set of words

In [51]:
nlp.Defaults.stop_words.remove('btw')

In [54]:
nlp.vocab['btw'].is_stop=False

In [55]:
len(nlp.Defaults.stop_words)

326

In [56]:
nlp.vocab['btw'].is_stop

False