In [None]:
!pip install requests beautifulsoup4 googlesearch-python newspaper3k transformers ipywidgets tqdm --quiet

In [None]:
import warnings
from dateutil.parser import UnknownTimezoneWarning
warnings.filterwarnings("ignore", category=UnknownTimezoneWarning)

In [None]:
import requests
from bs4 import BeautifulSoup
from googlesearch import search
from newspaper import Article
from transformers import pipeline
import ipywidgets as widgets
from IPython.display import display, Markdown, HTML
import time
from tqdm import tqdm

In [None]:
# Load summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

In [None]:
# Disclaimer block
DISCLAIMER = '''
<div style="border-left: 3px solid #ff0000; padding: 5px 10px; background: #fff3f3; margin-bottom: 10px;">
⚠️ <strong>Disclaimer:</strong> This tool provides <strong>automated summaries</strong> of publicly available articles.
Accuracy depends on source quality. Always verify original content.
Use for <strong>research only</strong>—not legal/defamatory purposes.
</div>
'''

In [None]:
# Philippine news sources
PH_NEWS_SOURCES = [
    "site:news.abs-cbn.com",
    "site:philstar.com",
    "site:rappler.com",
    "site:gmanetwork.com",
    "site:inquirer.net",
    "site:manilatimes.net"
]

In [None]:
# Google search
def get_news_links(query, num=5):
    try:
        query = f"{query} {' OR '.join(PH_NEWS_SOURCES)}"
        time.sleep(2)
        links = list(search(query, num_results=num, sleep_interval=5))
        return links
    except Exception as e:
        print(f"⚠️ Search error: {e}")
        return []

In [None]:
# Article extraction and summary
def extract_and_summarize(url):
    try:
        if not url.startswith(('http://', 'https://')):
            return "❌ Invalid URL"
        article = Article(url)
        article.download()
        article.parse()
        text = article.text.replace('\n', ' ').strip()[:2000]
        if not text:
            return "⚠️ No extractable text"
        word_count = len(text.split())
        max_len = max(30, min(130, word_count // 2))
        min_len = min(30, max_len - 5) if max_len > 35 else 10
        summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
        return summary
    except Exception as e:
        return f"⚠️ Processing failed: {str(e)}"

In [None]:
# Run OSINT tool
def run_osint_tool(candidate):
    display(HTML(DISCLAIMER))
    display(Markdown(f"## 🔍 Searching for: **{candidate}**"))
    links = get_news_links(candidate)
    if not links:
        display(Markdown("❌ No articles found. Try a different name or check spelling."))
        return
    for link in tqdm(links, desc="Analyzing articles"):
        source = link.split('/')[2].replace('www.', '')
        display(Markdown(f"### 🔗 Source ({source})"))
        summary = extract_and_summarize(link)
        display(Markdown(f"**📝 Summary from {source}:** {summary}\n---"))

In [None]:
# Simplified UI with dropdown
candidate_list = [
    'Abalos, Benjamin Jr. “Benhur” De Castro',
    'Villar, Camille Aguilar',
    'Tulfo, Erwin Teshiba',
    'Go, Christopher Lawrence Tesoro',
    'Pacquiao, Emmanuel “Manny” Dapidran'
]
dropdown = widgets.Dropdown(options=candidate_list, description='Candidate:')
search_button = widgets.Button(description='🔎 Search News', button_style='primary')
def on_click(b):
    if dropdown.value:
        run_osint_tool(dropdown.value)
search_button.on_click(on_click)
display(widgets.VBox([dropdown, search_button]))