In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [2]:
example_text = "This is a sample sentence, showing off the stop words filtration."

In [3]:
example_text

'This is a sample sentence, showing off the stop words filtration.'

In [4]:
example_doc = nlp(example_text)
example_doc

This is a sample sentence, showing off the stop words filtration.

### List of stopwords

In [5]:
spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS
print(spacy_stopwords)

{'below', 'over', 'however', 'per', 'say', 'get', 'herself', 'not', 'wherever', 'after', 'itself', 'due', 'thereby', 'four', 'have', 'well', 'nine', 'more', 'afterwards', 'one', 'every', 'to', 'i', 'became', 'other', 'nobody', 'if', 'between', 'elsewhere', 'keep', '’m', 'down', 'who', 'another', 'on', 'does', 'moreover', 'we', 'me', 'just', 'these', 'even', 'whither', 'through', 'less', 'five', 'give', '’ll', 'former', 'until', 'because', 'a', 'yourselves', 'what', 'several', 'serious', 'nor', 'via', 'last', 'ten', 'its', 'cannot', 'an', 'eight', 'may', 'do', 'only', 'hence', 'back', 'go', 'either', 'first', 'anywhere', 'toward', 'whom', 'ours', 'above', 'whatever', 'somehow', 'while', 'or', 'should', 'therein', 'within', 'myself', "'re", 'quite', 'whereafter', 'hereby', 'with', 'indeed', 'for', 'else', 'their', 'beside', 'twenty', 'almost', 'whole', 'whenever', 'each', 'much', 'upon', 'towards', 'himself', 'without', "n't", 'once', 'out', 'whoever', 'name', 'rather', 'next', 'few', 'm

### Stopwords length

In [6]:
len(spacy_stopwords)

326

### Sentence without stop words

In [7]:
for token in example_doc:
    print(f'{token!r:10} {token.is_punct:10} {token.is_stop:10}')

This                0          1
is                  0          1
a                   0          1
sample              0          0
sentence            0          0
,                   1          0
showing             0          0
off                 0          1
the                 0          1
stop                0          0
words               0          0
filtration          0          0
.                   1          0


In [8]:
sentence_without_stop_words = [token 
                               for token in example_doc 
                               if not token.is_stop]
sentence_without_stop_words

[sample, sentence, ,, showing, stop, words, filtration, .]

## Removing punctuation and stop words both

In [9]:
sentence_without_stop_words_and_punctuation = [token 
                                               for token in example_doc 
                                               if not token.is_stop and not token.is_punct]
sentence_without_stop_words_and_punctuation

[sample, sentence, showing, stop, words, filtration]

In [10]:
example_doc = nlp('Sample message! Notice: it has punctuation.')
example_doc

Sample message! Notice: it has punctuation.

In [11]:
for token in example_doc:
    print(f'{token!r:10} {token.is_punct:10} {token.is_stop:10}')

Sample              0          0
message             0          0
!                   1          0
Notice              0          0
:                   1          0
it                  0          1
has                 0          1
punctuation          0          0
.                   1          0


In [12]:
sentence_without_stop_words_and_punctuation = [token 
                                               for token in example_doc 
                                               if not token.is_stop and not token.is_punct]
sentence_without_stop_words_and_punctuation

[Sample, message, Notice, punctuation]