# NICF - Natural Language Processing (NLP) with Python for Beginners
# Answers to the Practical Performance Assessment 

## Part 1: Common NLP tasks

In [None]:
import spacy
print(spacy.__version__)

In [None]:
Text = " The COVID-19 pandemic in Singapore is part of the worldwide pandemic of coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). On 22 January 2020, a multi-ministerial committee was formed with Minister for Education Lawrence Wong and Minister for Health Gan Kim Yong as the co-chairs and Prime Minister Lee Hsien Loong and Deputy Prime Minister and Minister for Finance Heng Swee Keat as advisors"

In [None]:
nlp = spacy.load("en_core_web_sm")
doc = nlp(Text)

In [None]:
print(f"{'Token':20}{'POS':20}{'DEP':20}{'LEMMA':20}")
for token in doc:
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    token_lemma = token.lemma_
    print(f"{token_text:20}{token_pos:20}{token_dep:20}{token_lemma:20}")

In [None]:
for ent in doc.ents:
    print(ent.text, ent.label_)

## Part 2: Word Similarity

In [None]:
Text = "Exercise strengthens your heart and improves your circulation. The increased blood flow raises the oxygen levels in your body. This helps lower your risk of heart diseases such as high cholesterol, coronary artery disease, and heart attack. Regular exercise can also lower your blood pressure and triglyceride levels"

In [None]:
nlp = spacy.load("en_core_web_md")

In [None]:
from spacy.matcher import Matcher

matcher = Matcher(nlp.vocab)

pattern = [{"TEXT": "high"}, {"TEXT": "cholesterol"}]
matcher.add("high cholesterol", [pattern])

doc = nlp(Text)
matches = matcher(doc)

for match_id, start, end in matches:
    matched_span1 = doc[start:end]
    print(matched_span1.text, start, end)

In [None]:
from spacy.matcher import Matcher

matcher = Matcher(nlp.vocab)

pattern = [{"TEXT": "heart"}, {"TEXT": "attack"}]
matcher.add("heart attack", [pattern])

doc = nlp(Text)
matches = matcher(doc)

for match_id, start, end in matches:
    matched_span2 = doc[start:end]
    print(matched_span2.text, start, end)

In [None]:
similarity = matched_span1.similarity(matched_span2)
print(similarity)

## Part 3: Language Model

In [None]:
Text = " The COVID-19 pandemic in Singapore is part of the worldwide pandemic of coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). On 22 January 2020, a multi-ministerial committee was formed with Minister for Education Lawrence Wong and Minister for Health Gan Kim Yong as the co-chairs and Prime Minister Lee Hsien Loong and Deputy Prime Minister and Minister for Finance Heng Swee Keat as advisors"

In [None]:
from spacy.language import Language

@Language.component("my_component")
def my_component(doc):
    print(f"{'Token':20}{'POS':20}{'DEP':20}{'LEMMA':20}")
    for token in doc:
        token_text = token.text
        token_pos = token.pos_
        token_dep = token.dep_
        token_lemma = token.lemma_
        print(f"{token_text:20}{token_pos:20}{token_dep:20}{token_lemma:20}")
    return doc

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("my_component", name="my_component", last=True)
doc = nlp(Text)

## Part 4: Text Classification

In [None]:
import spacy
import random
import json
from spacy.training import Example

with open("gadgets.json", encoding="utf8") as f:
    TRAINING_DATA = json.loads(f.read())

nlp = spacy.blank("en")
ner = nlp.create_pipe('ner')
nlp.add_pipe('ner')
ner.add_label("GADGET")

examples = []
for text, annots in TRAINING_DATA:
    examples.append(Example.from_dict(nlp.make_doc(text), annots))

In [None]:
nlp.begin_training()
for i in range(10):
    random.shuffle(examples)
    for batch in spacy.util.minibatch(examples, size=3):
        nlp.update(batch)

In [None]:
Text = "I dropped my iPhone 12, the screen is cracked. I booked an appointment with Apple store on Monday to repair my iPhone."

In [None]:
doc = nlp(Text)
for ent in doc.ents:
    print(ent.text, ent.label_)

In [None]:
doc = nlp(Text)
for ent in doc.ents:
    print(ent.text, ent.label_)

## Part 5: Memory Network

In [None]:
Text1 = "Exercise strengthens your heart and improves your circulation. The increased blood flow raises the oxygen levels in your body. "

In [None]:
Text2 = " The COVID-19 pandemic in Singapore is part of the worldwide pandemic of coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)"

In [None]:
import spacy

nlp = spacy.load("en_core_web_trf")
print(nlp.pipe_names) 

In [None]:
text1 = nlp(Text1)
text2 = nlp(Text2)

In [None]:
print(text1[0].similarity(text2[0]))