# Extract Phone Number,  Email and Emojis using spacy:

### Phone Number Extract

In [3]:
import spacy
from spacy.matcher import Matcher
nlp = spacy.load('en_core_web_sm')


In [9]:
pattern = [{"ORTH": "("}, {"SHAPE": "dddd"}, {"ORTH": ")"}, {"SHAPE": "dddd"}, {"ORTH": "-", "OP": "?"}, {"SHAPE": "dddd"}]

In [10]:
matcher = Matcher(nlp.vocab)
matcher.add("PhoneNumber", None, pattern)

In [22]:
doc = nlp("Call me at (0307) 5021-8225")

In [23]:
matches = matcher(doc)
matches

[(7978097794922043545, 3, 9)]

In [24]:
for match_id,start, end in matches:
    span = doc[start:end]
    print(span.text)

(0307) 5021-8225


### Email Extract

In [28]:
Pattern = [{"TEXT": {"REGEX": "[a-zA-Z0-9-_.]+@[a-zA-Z0-9-_.]+"}}]


In [29]:
matcher = Matcher(nlp.vocab)
matcher.add("Email", None, Pattern)

In [30]:
doc = nlp("Name is Muhammad Rehan and my Email address is rehanraza0106@gmail.com")

In [31]:
matches = matcher(doc)
matches

[(11010771136823990775, 9, 10)]

In [33]:
for match_id, start, end in matches:
    span = doc[start:end]
    print(span.text)

rehanraza0106@gmail.com


### Emojis Extract: 


In [34]:
pos_emoji = ["😀", "😃", "😂", "🤣", "😊", "😍"]  # Positive emoji
neg_emoji = ["😞", "😠", "😩", "😢", "😭", "😒"]  # Negative emoji
pos_emoji

['😀', '😃', '😂', '🤣', '😊', '😍']

In [35]:
pos_patterns = [[{"ORTH": emoji}] for emoji in pos_emoji]
neg_patterns = [[{"ORTH": emoji}] for emoji in neg_emoji]

In [36]:
def label_sentiment(matcher, doc, i, matches):
    match_id, start, end = matches[i]
    if doc.vocab.strings[match_id] == 'HAPPY':
        doc.sentiment += 0.1
    elif doc.vocab.strings[match_id] == 'SAD':
        doc.sentiment -= 0.1

In [37]:
matcher = Matcher(nlp.vocab)
matcher.add("HAPPY", label_sentiment, *pos_patterns)
matcher.add('SAD', label_sentiment, *neg_patterns)
matcher.add('HASHTAG', None, [{'TEXT': '#'}, {'IS_ASCII': True}])

doc = nlp("Hello world 😀 #RehanRaza")
matches = matcher(doc)

for match_id, start, end in matches:
    string_id = doc.vocab.strings[match_id]  # Look up string ID
    span = doc[start:end]
    print(string_id, span.text)

HAPPY 😀
HASHTAG #RehanRaza


### Efficient phrase matching


In [38]:
from spacy.matcher import PhraseMatcher

matcher = PhraseMatcher(nlp.vocab)
terms = ['BARAC OBAMA', 'ANGELA MERKEL', 'WASHINGTON D.C.']
pattern = [nlp.make_doc(text) for text in terms]
pattern

[BARAC OBAMA, ANGELA MERKEL, WASHINGTON D.C.]

In [39]:
matcher.add('term', None, *pattern)
doc = nlp("German Chancellor ANGELA MERKEL and US President BARAC OBAMA "
          "converse in the Oval Office inside the White House in WASHINGTON D.C.")
doc

German Chancellor ANGELA MERKEL and US President BARAC OBAMA converse in the Oval Office inside the White House in WASHINGTON D.C.

In [40]:
matches = matcher(doc)
for match_id, start, end in matches:
    span = doc[start:end]
    print(span.text)

ANGELA MERKEL
BARAC OBAMA
WASHINGTON D.C.


In [41]:
matches


[(4519742297340331040, 2, 4),
 (4519742297340331040, 7, 9),
 (4519742297340331040, 19, 21)]