## DATA 622 Natural Language Processing
### Homework 9

Questions

Use the article https://apnews.com/article/boeing-aviation-aircraft-air-india-crashf12b20e65dc57ae655a1e0759b58938f.
1. Classify the sentiment, the intent, and the emotions.
2. Determine how much the article is about technology, aviation, and policies.
3. Use LLMs and one Deep Learning method of your choice to answer the questions.
Compare the results.

In [12]:
%pip install -qU "requests==2.32.4" beautifulsoup4 scikit-learn
%pip install -q --index-url https://download.pytorch.org/whl/cpu torch==2.8.0+cpu
%pip install -q "transformers==4.45.2" "sentence-transformers==3.0.1"

# quick sanity print
import torch, transformers, sentence_transformers
print("torch:", torch.__version__, "| transformers:", transformers.__version__, "| sbert:", sentence_transformers.__version__)

torch: 2.8.0+cpu | transformers: 4.57.1 | sbert: 5.1.2


In [11]:
# === Setup & fetch ============================================================
import re, textwrap, numpy as np, requests
from bs4 import BeautifulSoup

URL = "https://apnews.com/article/boeing-aviation-aircraft-air-india-crashf12b20e65dc57ae655a1e0759b58938f"

def fetch_clean(url: str) -> str:
    r = requests.get(url, timeout=30); r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    for bad in soup(["script","style","noscript"]): bad.decompose()
    text = " ".join(soup.get_text(" ").split())
    return re.sub(r"\s+"," ", text).strip()

doc = fetch_clean(URL)

print("="*100)
print("Source")
print("-"*100)
print(f"URL: {URL}\nCharacters: {len(doc):,}\n")

# === Helpers =================================================================
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util

def summarize(text, max_sent=3):
    sents = [s for s in re.split(r'(?<=[.!?])\s+', text) if 40<=len(s)<=500]
    if not sents: return ""
    if len(sents) <= max_sent: return " ".join(sents)
    V = TfidfVectorizer(stop_words="english", max_features=8000); S = V.fit_transform(sents)
    score = (S.power(2).sum(axis=1)).A.ravel(); ix=np.argsort(score)[::-1][:max_sent]; ix.sort()
    return " ".join([sents[i] for i in ix])

# Keep model inputs safe (avoid 512/1024 token issues)
brief_for_dl  = summarize(doc, 3)           # ≤ ~3 sentences
snippet_for_zs = doc[:2000]                 # safe chunk for zero-shot

# Build pipelines on CPU
zs      = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)
dl_sent = pipeline("sentiment-analysis",     model="distilbert-base-uncased-finetuned-sst-2-english", device=-1)
st      = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# -----------------------------------------------------------------------------
# 1) SENTIMENT, INTENT, EMOTIONS (LLM + DL)
# -----------------------------------------------------------------------------
print("="*100)
print("1) Sentiment, Intent, Emotions")
print("-"*100)

sentiment_labels = ["positive", "neutral", "negative"]
intent_labels    = ["inform", "analyze", "warn", "advocate", "criticize", "celebrate"]
emotion_labels   = ["anger", "fear", "sadness", "joy", "disgust", "surprise", "trust", "anticipation"]

# LLM (BART MNLI, zero-shot on a safe slice)
llm_sent    = zs(snippet_for_zs, sentiment_labels, multi_label=False)
llm_intent  = zs(snippet_for_zs, intent_labels,  multi_label=True)
llm_emotion = zs(snippet_for_zs, emotion_labels, multi_label=True)

# Deep Learning (supervised DistilBERT on short summary)
dl_out   = dl_sent(brief_for_dl)[0]
dl_label = {"POSITIVE": "positive", "NEGATIVE": "negative"}[dl_out["label"]]

def top_k(z, k=3): return list(zip(z["labels"][:k], z["scores"][:k]))

print("LLM — Sentiment (zero-shot):")
print(f"  {llm_sent['labels'][0]}  (p≈{llm_sent['scores'][0]:.3f})")
print("DL  — Sentiment (DistilBERT on 3-sentence summary):")
print(f"  {dl_label}  (score≈{dl_out['score']:.3f})\n")

print("LLM — Intent (top 3):")
for lab, sc in top_k(llm_intent, 3): print(f"  {lab:>10s} : {sc:.3f}")

print("\nLLM — Emotions (top 5):")
for lab, sc in top_k(llm_emotion, 5): print(f"  {lab:>10s} : {sc:.3f}")

# -----------------------------------------------------------------------------
# 2) Technology, Aviation, Policies (LLM + DL)
# -----------------------------------------------------------------------------
print("\n" + "="*100)
print("2) Topic strengths (Technology, Aviation, Policies)")
print("-"*100)

topics = {
    "technology": "aircraft systems, engineering, manufacturing, software, sensors, design, innovation",
    "aviation":   "airlines, airports, flights, aircraft, pilots, safety, air traffic, crash investigations",
    "policies":   "regulations, government rules, safety policies, compliance, oversight, investigations, penalties"
}

# LLM zero-shot topic weights
llm_topic = zs(snippet_for_zs, list(topics.keys()), multi_label=True)
llm_topic_scores = {k: float(v) for k,v in zip(llm_topic["labels"], llm_topic["scores"])}
sum_llm = sum(llm_topic_scores.values()) or 1e-9
llm_pct = {k: 100*(llm_topic_scores[k]/sum_llm) for k in topics}

print("LLM — topic distribution (%):")
for k in topics: print(f"  {k:>10s}: {llm_pct[k]:6.2f}%")

# DL topic weights via SBERT similarity (on brief summary)
doc_emb    = st.encode(brief_for_dl, normalize_embeddings=True)
topic_embs = {k: st.encode(v, normalize_embeddings=True) for k,v in topics.items()}
sims = {k: float(util.cos_sim(doc_emb, topic_embs[k])) for k in topics}

vals = np.array(list(sims.values()), dtype=float)
if (vals.max()-vals.min()) < 1e-9:
    dl_pct = {k: 100.0/len(sims) for k in sims}
else:
    mm = (vals - vals.min())/(vals.max()-vals.min()); mm = mm / mm.sum()
    dl_pct = {k: float(100*m) for k,m in zip(sims.keys(), mm)}

print("\nDL — topic distribution (%):")
for k in topics: print(f"  {k:>10s}: {dl_pct[k]:6.2f}%")

# -----------------------------------------------------------------------------
# 3) COMPARISON SUMMARY
# -----------------------------------------------------------------------------
print("\n" + "="*100)
print("3) LLM vs Deep Learning — Comparison")
print("-"*100)
print("Sentiment")
print(f"  LLM: {llm_sent['labels'][0]} (p≈{llm_sent['scores'][0]:.3f})")
print(f"  DL : {dl_label} (score≈{dl_out['score']:.3f})")

print("\nTopics (% emphasis)")
for k in topics:
    print(f"  {k:>10s} | LLM: {llm_pct[k]:6.2f}%   DL: {dl_pct[k]:6.2f}%")

print("\nIntent (LLM top 3):", ", ".join([f"{lab} ({sc:.2f})" for lab,sc in top_k(llm_intent,3)]))
print("Emotions (LLM top 5):", ", ".join([f"{lab} ({sc:.2f})" for lab,sc in top_k(llm_emotion,5)]))

major_llm = max(llm_pct, key=llm_pct.get)
major_dl  = max(dl_pct,  key=dl_pct.get)
narr = (
    f"LLM emphasizes **{major_llm}**, while DL similarity emphasizes **{major_dl}**. "
    f"LLM uses zero-shot reasoning for sentiment/intent/emotions; DL sentiment is supervised "
    f"and SBERT gives topic similarity. Minor differences come from truncation strategy and model objectives."
)
print("\nSummary:")
print(textwrap.fill(narr, 100))


Source
----------------------------------------------------------------------------------------------------
URL: https://apnews.com/article/boeing-aviation-aircraft-air-india-crashf12b20e65dc57ae655a1e0759b58938f
Characters: 43,196



Device set to use cpu
Device set to use cpu


1) Sentiment, Intent, Emotions
----------------------------------------------------------------------------------------------------
LLM — Sentiment (zero-shot):
  negative  (p≈0.758)
DL  — Sentiment (DistilBERT on 3-sentence summary):
  positive  (score≈0.996)

LLM — Intent (top 3):
        warn : 0.621
      inform : 0.547
     analyze : 0.467

LLM — Emotions (top 5):
       anger : 0.530
        fear : 0.487
     sadness : 0.414
       trust : 0.386
  anticipation : 0.294

2) Topic strengths (Technology, Aviation, Policies)
----------------------------------------------------------------------------------------------------
LLM — topic distribution (%):
  technology:  21.27%
    aviation:  59.52%
    policies:  19.21%

DL — topic distribution (%):
  technology:   1.03%
    aviation:  98.97%
    policies:   0.00%

3) LLM vs Deep Learning — Comparison
----------------------------------------------------------------------------------------------------
Sentiment
  LLM: negative (p≈0.758)
