## Natural Language Processing with Spacy

In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
# Working with String
text = ('Deep learning is a subset of Machine Learning.')
docs = nlp(text)
print([token.text for token in docs])

['Deep', 'learning', 'is', 'a', 'subset', 'of', 'Machine', 'Learning', '.']


In [5]:
# Working with Files
file_name = 'intro.txt'
file_text = open(file_name).read()
file_doc = nlp(file_text)
print([token.text for token in file_doc])

['A', 'man', 'is', 'born', 'free', ',', 'and', 'everywhere', 'he', 'is', 'in', 'chains', '.']


In [7]:
sent_text = ('NLP is a part of machine learning.''Deep leanrning is a part of machine learning.')
sent_doc = nlp(sent_text)
sentences = list(sent_doc.sents)
len(sentences)
for sentence in sentences:
    print (sentence)

NLP is a part of machine learning.
Deep leanrning is a part of machine learning.


### Tokenization

In [8]:
for token in sent_doc:
    print(token, token.idx)

NLP 0
is 4
a 7
part 9
of 14
machine 17
learning 25
. 33
Deep 34
leanrning 39
is 49
a 52
part 54
of 59
machine 62
learning 70
. 78


In [9]:
# Stop Words
stop_words = spacy.lang.en.stop_words.STOP_WORDS
len(stop_words)

326

In [10]:
list(stop_words)[:10]

['towards',
 'such',
 'of',
 'did',
 'afterwards',
 'an',
 'less',
 'fifty',
 'bottom',
 'three']

In [11]:
for token in sent_doc:
    if not token.is_stop:
        print(token)

NLP
machine
learning
.
Deep
leanrning
machine
learning
.


### Lemmatization

In [18]:
help_text = ("Organizing")
help_doc = nlp(help_text)
for token in help_doc:
    print(token.lemma_)

organize


### Word Frequency

In [21]:
from collections import Counter

complete_text = ('To make deliberate progress towards more intelligent and more human-like artificial systems,'
                 'we need to be following an appropriate feedback signal: we need to be able to define '
                 'and evaluate intelligence in a way that enables comparisons between two systems,' 
                 'as well as comparisons with humans.')

comp_doc = nlp(complete_text)

words = ([token.text for token in comp_doc
          if not token.is_stop and not token.is_punct])

word_freq = Counter(words)

common_words = word_freq.most_common(5)
#print(common_words)

unique_words = [word for (word, freq) in word_freq.items() if freq == 1]
print(unique_words)

['deliberate', 'progress', 'intelligent', 'human', 'like', 'artificial', 'following', 'appropriate', 'feedback', 'signal', 'able', 'define', 'evaluate', 'intelligence', 'way', 'enables', 'humans']


### Part of Speech Tagging

In [27]:
for token in comp_doc:
    print(token, token.tag_, token.pos_, spacy.explain(token.tag_))

To TO PART infinitival "to"
make VB VERB verb, base form
deliberate JJ ADJ adjective
progress NN NOUN noun, singular or mass
towards IN ADP conjunction, subordinating or preposition
more RBR ADV adverb, comparative
intelligent JJ ADJ adjective
and CC CCONJ conjunction, coordinating
more RBR ADV adverb, comparative
human JJ ADJ adjective
- HYPH PUNCT punctuation mark, hyphen
like JJ ADJ adjective
artificial JJ ADJ adjective
systems NNS NOUN noun, plural
, , PUNCT punctuation mark, comma
we PRP PRON pronoun, personal
need VBP VERB verb, non-3rd person singular present
to TO PART infinitival "to"
be VB AUX verb, base form
following VBG VERB verb, gerund or present participle
an DT DET determiner
appropriate JJ ADJ adjective
feedback NN NOUN noun, singular or mass
signal NN NOUN noun, singular or mass
: : PUNCT punctuation mark, colon or ellipsis
we PRP PRON pronoun, personal
need VBP VERB verb, non-3rd person singular present
to TO PART infinitival "to"
be VB AUX verb, base form
able JJ A

In [28]:
nouns = []
adjective = []
for token in comp_doc:
    if token.pos_ == 'NOUN':
        nouns.append(token)
    if token.pos_ == 'ADJ':
        adjective.append(token)

In [29]:
nouns

[progress,
 systems,
 feedback,
 signal,
 intelligence,
 way,
 comparisons,
 systems,
 comparisons,
 humans]

In [30]:
adjective

[deliberate, intelligent, human, like, artificial, appropriate, able]

In [None]:
from spacy import displacy

text = ('I am interested in leaning Natural Language Processing')
text_doc = nlp(text)

displacy.serve(text_doc, style='dep')