In [3]:
# Split by whiteSpace 
import re

text = 'I\'m with you for the entire life in U.K.!'
words = re.split(r'\W+',text)
print(words[:100])

['I', 'm', 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'U', 'K', '']


In [4]:
# Selct Words
words = re.split(r'\W+',text)
print(words[:100])

['I', 'm', 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'U', 'K', '']


In [6]:
# Punctuation remove mean comma and exclamation 
import string
import re

# Split into words by white space
words = text.split()

# Prepare regex for char filtering
re_punc = re.compile('[%s]' % re.escape(string.punctuation))

# remove punctuation from each words
stripped = [re_punc.sub('',w) for w in words]
print(stripped[:100])


['Im', 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'UK']


In [8]:
# string.printable inverse of string.punctuation
re_punc = re.compile('[^%s]' % re.escape(string.printable))
result = [re_punc.sub('',w) for w in words]
print(result[:100])

["I'm", 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'U.K.!']


In [10]:
# Normalizing Case

#Split into words by white space
words = text.split()
# convert to lower case
words = [word.lower() for word in words]
print(words[:100])

["i'm", 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'u.k.!']


In [11]:
# Install by https://spacy.io/usage/facts-figures#benchmarks
# conda install -c conda-forge spacy
# or 
# pip install -U spacy

# Alternatively you can create a virtual environment:
# conda create -n spacyenv python=3 spacy=2

In [18]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [25]:
string = '"I\'m with you for the entire life in P.K.!""'
print(string)

"I'm with you for the entire life in P.K.!""


In [28]:
doc = nlp(string)
for token in doc:
    print(token.text, end=' | ')

" | I | 'm | with | you | for | the | entire | life | in | P.K. | ! | " | " | 

In [29]:
doc2 = nlp(u"We're here to help! Send snail-mail, email shahhashmatali667@gmail.com or visit us ar https://myupdatedportfolio.netlify.app/!")
for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
shahhashmatali667@gmail.com
or
visit
us
ar
https://myupdatedportfolio.netlify.app/
!


In [30]:
doc3 = nlp(u"A 5km NYC ride costs $10.50")
for t in doc3:
    print(t)

A
5
km
NYC
ride
costs
$
10.50


In [31]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [37]:
print(f'doc Length {len(doc)}')
print(f'doc Vocabulary {len(doc.vocab)}')

doc Length 14
doc Vocabulary 797


In [38]:
doc5 = nlp(u"It si better to give than to receive.")
doc5[2]

better

In [39]:
# Retrieve the three token from the middle:
doc5[2:5]

better to give

In [44]:
# Retrieve the last three token:
doc5[-4:]

than to receive.

In [45]:
doc6 = nlp(u'My dinner was horrible.')
doc7 = nlp(u'Your dinner was horrible')

In [48]:
# Try to change My dinner was horrible. to Your dinner was horrible.

doc6[3] = doc7[3]

# we coould not save value variable to variable in spacy 

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [52]:
doc8 = nlp(u'Apple to build Honng kong factory for $6 million')

for token in doc8:
    print(token.text, end=' | ')
    
print('\n------')

for ent in doc8.ents:
    print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

Apple | to | build | Honng | kong | factory | for | $ | 6 | million | 
------
Apple - ORG - Companies, agencies, institutions, etc.
Honng kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [53]:
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufactures.")
for chunk in doc9.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufactures


In [59]:
doc10 = nlp(u"Ali is eating apple")
for chunk in doc10.noun_chunks:
    print(chunk.text)

Ali
apple


In [60]:
doc11 = nlp(u"He was palying football.")
for chunk in doc11.noun_chunks:
    print(chunk.text)

He
football


In [61]:
doc12 = nlp(u"His foot was destroying while playing football.")
for chunk in doc12.noun_chunks:
    print(chunk.text)

His foot
football


In [62]:
 doc9 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")
for chunk in doc9.noun_chunks:
    print(chunk.text)

He
a one-eyed, one-horned, flying, purple people-eater


In [64]:
# spacy displacy

from spacy import displacy

doc = nlp(u'Apple is going to build a U.K factory for $6 million.')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 220})

In [66]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousan ipods for a profilt of $6 million.')
displacy.render(doc, style='ent', jupyter=True)

In [None]:
doc = nlp(u'This is a sentence.')
displacy.serve(doc, style='dep')