In [1]:
###Import required libraries
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
example_text = "Hello Mr. Smith, how are you doing today? " \
               "The weather is great, and Python is awesome. " \
               "The sky is pinkish-blue. " \
               "You shouldn't eat cardboard."

In [3]:
introduction_doc = nlp(example_text)
introduction_doc

Hello Mr. Smith, how are you doing today? The weather is great, and Python is awesome. The sky is pinkish-blue. You shouldn't eat cardboard.

### Sentence Detection

In [4]:
sentences = list(introduction_doc.sents)
sentences

[Hello Mr. Smith, how are you doing today?,
 The weather is great, and Python is awesome.,
 The sky is pinkish-blue.,
 You shouldn't eat cardboard.]

In [5]:
for sentence in sentences:
    print(sentence)

Hello Mr. Smith, how are you doing today?
The weather is great, and Python is awesome.
The sky is pinkish-blue.
You shouldn't eat cardboard.


### How many sentences are there?

In [6]:
len(sentences)

4

### Tokenize the text with words

In [7]:
for token in introduction_doc:
    print(f'{token!r:10} {token.idx:5} {token.text_with_ws:10} {token.is_alpha:10} \
          {token.is_punct:10} {token.is_space:10} {token.is_stop:10}')

Hello          0 Hello               1                    0          0          0
Mr.            6 Mr.                 0                    0          0          0
Smith         10 Smith               1                    0          0          0
,             15 ,                   0                    1          0          0
how           17 how                 1                    0          0          1
are           21 are                 1                    0          0          1
you           25 you                 1                    0          0          1
doing         29 doing               1                    0          0          1
today         35 today               1                    0          0          0
?             40 ?                   0                    1          0          0
The           42 The                 1                    0          0          1
weather       46 weather             1                    0          0          0
is            54

### Import required libraries

In [8]:
from collections import Counter

### Find words frequency

In [9]:
words = [token.text
         for token in introduction_doc
         if not token.is_stop and not token.is_punct]
word_freq = Counter(words)
word_freq

Counter({'Hello': 1,
         'Mr.': 1,
         'Smith': 1,
         'today': 1,
         'weather': 1,
         'great': 1,
         'Python': 1,
         'awesome': 1,
         'sky': 1,
         'pinkish': 1,
         'blue': 1,
         'eat': 1,
         'cardboard': 1})

### Print 10 most common words :

In [10]:
common_words = word_freq.most_common(10)
common_words

[('Hello', 1),
 ('Mr.', 1),
 ('Smith', 1),
 ('today', 1),
 ('weather', 1),
 ('great', 1),
 ('Python', 1),
 ('awesome', 1),
 ('sky', 1),
 ('pinkish', 1)]

### Get the frequency of word - "weather"

In [11]:
word_freq['weather']

1