<a href="https://colab.research.google.com/github/jspumn/jspumn.github.io/blob/main/Word_embedding_prototype.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 📌 Step 1: 安裝必要套件（加上 lxml_html_clean）
!pip install newspaper3k
!pip install sentence-transformers
!pip install lxml_html_clean


# 📌 Step 2: 匯入函式庫
from newspaper import Article
from sentence_transformers import SentenceTransformer, util

# 📌 Step 3: 初始化 embedding 模型（可選換其他模型）
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# 📌 Step 4: 擷取新聞內容函式
def get_article_text(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        return article.text
    except Exception as e:
        return f"Error fetching article: {e}"

# 📌 Step 5: 輸入兩個新聞連結
url1 = input("Enter the first news URL: ")
url2 = input("Enter the second news URL: ")

text1 = get_article_text(url1)
text2 = get_article_text(url2)

print("\n--- News 1 Preview ---\n", text1[:500])
print("\n--- News 2 Preview ---\n", text2[:500])



##test1 : url1: https://avinews.com/ph/new-vaccine-development-center-inaugurated-in-the-philippines/
##test2 : url2: https://tribune.net.ph/2025/04/15/vaccine-center-combats-animal-diseases
##test3 : url3: https://www.bworldonline.com/economy/2025/04/13/665630/meat-imports-top-16-in-feb-as-asf-continues-to-pressure-supply/#google_vignette

# 📌 Step 6: 向量化並計算語意相似度
embedding1 = model.encode(text1, convert_to_tensor=True)
embedding2 = model.encode(text2, convert_to_tensor=True)
similarity = util.cos_sim(embedding1, embedding2).item()

# 📌 Step 7: 結果輸出
print(f"\n✅ Cosine similarity: {similarity:.3f}")

if similarity > 0.90:
    print("✅ SAME EVENT – High Confidence")
elif similarity > 0.75:
    print("⚠️ POSSIBLY Related – Needs Review")
elif similarity > 0.50:
    print("❔ Possibly Similar Topic – Likely Different Event")
else:
    print("❌ DIFFERENT Event")



Enter the first news URL: https://avinews.com/ph/new-vaccine-development-center-inaugurated-in-the-philippines/
Enter the second news URL: https://www.bworldonline.com/economy/2025/04/13/665630/meat-imports-top-16-in-feb-as-asf-continues-to-pressure-supply/#google_vignette

--- News 1 Preview ---
 The Department of Agriculture (DA) officially inaugurated a new vaccine development unit at the Center for Transboundary Animal Diseases (CenTrAD) at the Central Luzon State University (CLSU) in Muñoz, Nueva Ecija.

To advance vaccine development, the DA has allocated PHP 151 million for a three-year research program, which includes acquiring the country’s first Biosafety Level 3 Laboratory (BSL-3). This high-security lab enables research on microbes—both indigenous and exotic—that can cause sev

--- News 2 Preview ---
 By Kyle Aristophere T. Atienza, Reporter

MEAT IMPORTS rose 16.18% year on year in February, the Bureau of Animal Industry (BAI) said, with domestic supply remaining constrain