## Stopword Removal using NLTK

In [33]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [34]:
# set of stop words
stop_words = set(stopwords.words('english')) 
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\NAROT\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [35]:
text = '''Google News is a news aggregator service developed by Google. It presents a continuous
flow of articles organized from thousands of publishers and magazines. Google News is available as 
an app on Android, iOS, and the Web. Google released a beta version in September 2002 and the 
official app in January 2006. Wikipedia'''

In [36]:
# tokens of words  
word_tokens = word_tokenize(text) 
    
filtered_sentence = [] 
  
for w in word_tokens: 
    if w not in stop_words: 
        filtered_sentence.append(w) 

In [37]:
print("\n\nOriginal Sentence \n\n")
print(" ".join(word_tokens)) 

print("\n\nFiltered Sentence \n\n")
print(" ".join(filtered_sentence)) 



Original Sentence 


Google News is a news aggregator service developed by Google . It presents a continuous flow of articles organized from thousands of publishers and magazines . Google News is available as an app on Android , iOS , and the Web . Google released a beta version in September 2002 and the official app in January 2006 . Wikipedia


Filtered Sentence 


Google News news aggregator service developed Google . It presents continuous flow articles organized thousands publishers magazines . Google News available app Android , iOS , Web . Google released beta version September 2002 official app January 2006 . Wikipedia


## Stopword Removal using spaCy

In [4]:
from spacy.lang.en import English

In [5]:
nlp = English()

In [9]:
text = '''Google News is a news aggregator service developed by Google. It presents a continuous
flow of articles organized from thousands of publishers and magazines. Google News is available as 
an app on Android, iOS, and the Web. Google released a beta version in September 2002 and the 
official app in January 2006. Wikipedia'''

In [10]:
my_doc = nlp(text)
token_list = []
for token in my_doc:
    token_list.append(token.text)

In [11]:
from spacy.lang.en.stop_words import STOP_WORDS

In [12]:
filtered_sentence =[] 

for word in token_list:
    lexeme = nlp.vocab[word]
    if lexeme.is_stop == False:
        filtered_sentence.append(word) 

In [14]:
print(token_list)

['Google', 'News', 'is', 'a', 'news', 'aggregator', 'service', 'developed', 'by', 'Google', '.', 'It', 'presents', 'a', 'continuous', '\n', 'flow', 'of', 'articles', 'organized', 'from', 'thousands', 'of', 'publishers', 'and', 'magazines', '.', 'Google', 'News', 'is', 'available', 'as', '\n', 'an', 'app', 'on', 'Android', ',', 'iOS', ',', 'and', 'the', 'Web', '.', 'Google', 'released', 'a', 'beta', 'version', 'in', 'September', '2002', 'and', 'the', '\n', 'official', 'app', 'in', 'January', '2006', '.', 'Wikipedia']


In [15]:
print(filtered_sentence)

['Google', 'News', 'news', 'aggregator', 'service', 'developed', 'Google', '.', 'presents', 'continuous', '\n', 'flow', 'articles', 'organized', 'thousands', 'publishers', 'magazines', '.', 'Google', 'News', 'available', '\n', 'app', 'Android', ',', 'iOS', ',', 'Web', '.', 'Google', 'released', 'beta', 'version', 'September', '2002', '\n', 'official', 'app', 'January', '2006', '.', 'Wikipedia']


## Stopword Removal using Gensim

In [40]:
from gensim.parsing.preprocessing import remove_stopwords

In [44]:
text

'Google News is a news aggregator service developed by Google. It presents a continuous\nflow of articles organized from thousands of publishers and magazines. Google News is available as \nan app on Android, iOS, and the Web. Google released a beta version in September 2002 and the \nofficial app in January 2006. Wikipedia'

In [42]:
result = remove_stopwords(text)

In [43]:
print('\n\n Filtered Sentence \n\n')
print(result)  



 Filtered Sentence 


Google News news aggregator service developed Google. It presents continuous flow articles organized thousands publishers magazines. Google News available app Android, iOS, Web. Google released beta version September 2002 official app January 2006. Wikipedia
