# CS5720 Neural Networks and Deep Learning
### Home Assignment 4 – Kaggle Notebook Format

In [1]:
# Q1: NLP Preprocessing Pipeline

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

nltk.download('punkt')
nltk.download('stopwords')

def preprocess(sentence):
    ps = PorterStemmer()
    stop_words = set(stopwords.words("english"))

    tokens = word_tokenize(sentence)
    print("Original Tokens:", tokens)

    filtered = [w for w in tokens if w.lower() not in stop_words]
    print("Tokens Without Stopwords:", filtered)

    stemmed = [ps.stem(w) for w in filtered]
    print("Stemmed Words:", stemmed)

sentence = "NLP techniques are used in virtual assistants like Alexa and Siri."
preprocess(sentence)


Original Tokens: ['NLP', 'techniques', 'are', 'used', 'in', 'virtual', 'assistants', 'like', 'Alexa', 'and', 'Siri', '.']
Tokens Without Stopwords: ['NLP', 'techniques', 'used', 'virtual', 'assistants', 'like', 'Alexa', 'Siri', '.']
Stemmed Words: ['nlp', 'techniqu', 'use', 'virtual', 'assist', 'like', 'alexa', 'siri', '.']


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
# Short Answer:
# Q: What is difference between stemming and lemmatization?
# A: Stemming cuts word by rule, like 'running' becomes 'runn'. Lemmatization use grammar, 'running' becomes 'run'.

# Q: Why remove stopwords?
# A: Removing stopwords help in classification and search, but can harm tasks like translation or QnA.


In [3]:
# Q2: Named Entity Recognition with SpaCy

import spacy

nlp = spacy.load("en_core_web_sm")
text = "Barack Obama served as the 44th President of the United States and won the Nobel Peace Prize in 2009."
doc_spacy = nlp(text)

for ent in doc_spacy.ents:
    print(ent.text, ent.label_, ent.start_char, ent.end_char)


Barack Obama PERSON 0 12
44th ORDINAL 27 31
the United States GPE 45 62
the Nobel Peace Prize WORK_OF_ART 71 92
2009 DATE 96 100


In [4]:
# Short Answer:
# Q: How NER is different from POS tagging?
# A: NER finds names like people, location. POS tagging finds part of speech like noun, verb etc.

# Q: Applications of NER?
# A: Used in finance news to find company names. Also used in search engines to improve understanding.


In [5]:
# Q3: Scaled Dot-Product Attention

import numpy as np
import math

def scaled_dot_product_attention(Q, K, V):
    d = Q.shape[-1]
    scores = np.dot(Q, K.T) / math.sqrt(d)
    weights = np.exp(scores) / np.sum(np.exp(scores), axis=1, keepdims=True)
    output = np.dot(weights, V)
    print("Attention Weights:\n", weights)
    print("Output:\n", output)

Q = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])
K = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])
V = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
scaled_dot_product_attention(Q, K, V)


Attention Weights:
 [[0.73105858 0.26894142]
 [0.26894142 0.73105858]]
Output:
 [[2.07576569 3.07576569 4.07576569 5.07576569]
 [3.92423431 4.92423431 5.92423431 6.92423431]]


In [6]:
# Short Answer:
# Q: Why divide by sqrt(d)?
# A: To avoid large values in softmax which cause instability.

# Q: How self-attention help?
# A: It allow model to relate each word with every other word in sentence.


In [7]:
# Q4: Sentiment Analysis using HuggingFace Transformers

from transformers import pipeline

classifier = pipeline("sentiment-analysis")
result = classifier("Despite the high price, the performance of the new MacBook is outstanding.")[0]
print("Sentiment:", result['label'])
print("Confidence Score:", result['score'])


2025-05-06 04:58:37.332974: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746507517.617422      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746507517.703295      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


Sentiment: POSITIVE
Confidence Score: 0.9998302459716797


In [8]:
# Short Answer:
# Q: Difference between BERT and GPT?
# A: BERT uses encoder, good for understanding. GPT uses decoder, good for generating.

# Q: Why use pre-trained models?
# A: They save time, need less data, and give better result than training from start.
