In [2]:
import requests
from bs4 import BeautifulSoup
import tkinter as tk
from tkinter import ttk, messagebox
import matplotlib.pyplot as plt
from unittest import TestCase, mock
import logging
from datetime import datetime

# Logging configuration
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# NewsAPI Key (https://newsapi.org/)
API_KEY = "dd596c9f-49c8-4e82-ba2e-75f1af6eb0b1"  # Your NewsAPI.org key

class NewsAggregator:
    def __init__(self):
        self.news_data = []
        self.session = requests.Session()
        self.session.headers.update({'User-Agent': 'Mozilla/5.0'})

    def fetch_news_from_api(self, category="general", query=None):
        """Fetch news from NewsAPI"""
        if query:
            # Search for specific query
            from_date = datetime.now().strftime('%Y-%m-%d')
            url = f"https://newsapi.org/v2/everything?q={query}&from={from_date}&sortBy=publishedAt&apiKey={API_KEY}"
        else:
            # Get top headlines by category
            url = f"https://newsapi.org/v2/top-headlines?category={category}&apiKey={API_KEY}"
        
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
            
            if data.get('status') != 'ok':
                logger.error(f"API error: {data.get('message', 'Unknown error')}")
                return []
                
            logger.info(f"Fetched {len(data.get('articles', []))} news articles from API")
            return data.get("articles", [])
        except requests.exceptions.RequestException as e:
            logger.error(f"API request failed: {e}")
            return []

    def scrape_article_content(self, url):
        """Scrape article content using BeautifulSoup"""
        try:
            response = self.session.get(url, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Logic to extract content from various news sites
            content = ""
            common_selectors = [
                'article', 
                '.article-body', 
                '.post-content', 
                'div[itemprop="articleBody"]',
                'p'
            ]
            
            for selector in common_selectors:
                elements = soup.select(selector)
                if elements:
                    content = " ".join([e.get_text().strip() for e in elements])
                    break
            
            return content[:1000] + "..." if content else "Content not available"
        except Exception as e:
            logger.error(f"Scraping error: {e}")
            return f"Error: {e}"

    def aggregate_news(self, category, query=None):
        """Combine API data and scraped content"""
        api_news = self.fetch_news_from_api(category, query)
        if not api_news:
            return []
        
        for article in api_news:
            article["full_content"] = self.scrape_article_content(article.get("url", ""))
            # Format date
            if article.get("publishedAt"):
                try:
                    dt = datetime.strptime(article["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
                    article["publishedAt"] = dt.strftime("%d %b, %Y %I:%M %p")
                except:
                    article["publishedAt"] = article["publishedAt"]
        
        self.news_data = api_news
        return api_news

    def visualize_news_sources(self):
        """Visualize news sources using matplotlib"""
        if not self.news_data:
            raise ValueError("No data available for visualization")
        
        sources = {}
        for article in self.news_data:
            source = article["source"]["name"]
            sources[source] = sources.get(source, 0) + 1
        
        plt.figure(figsize=(10, 5))
        plt.bar(sources.keys(), sources.values(), color='skyblue')
        plt.title("Number of Articles by News Source")
        plt.xlabel("Source")
        plt.ylabel("Count")
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()

class NewsApp:
    def __init__(self, root):
        self.root = root
        self.root.title("News Aggregator")
        self.root.geometry("800x600")
        self.aggregator = NewsAggregator()

        # Style configuration
        self.style = ttk.Style()
        self.style.configure('TLabel', font=('Helvetica', 12))
        self.style.configure('TButton', font=('Helvetica', 12))
        self.style.configure('Treeview', font=('Helvetica', 11), rowheight=25)

        # Create GUI widgets
        self.create_widgets()

    def create_widgets(self):
        """Create GUI widgets"""
        # Main container
        main_frame = ttk.Frame(self.root)
        main_frame.pack(fill='both', expand=True, padx=10, pady=10)

        # Search frame
        search_frame = ttk.Frame(main_frame)
        search_frame.pack(fill='x', pady=5)
        
        ttk.Label(search_frame, text="Search:").pack(side='left')
        
        self.search_var = tk.StringVar()
        self.search_entry = ttk.Entry(search_frame, textvariable=self.search_var, width=30)
        self.search_entry.pack(side='left', padx=5)
        
        # Category selection
        category_frame = ttk.Frame(main_frame)
        category_frame.pack(fill='x', pady=5)
        
        ttk.Label(category_frame, text="Category:").pack(side='left')
        
        self.category_var = tk.StringVar()
        self.category_dropdown = ttk.Combobox(
            category_frame,
            textvariable=self.category_var,
            values=["general", "business", "technology", "sports", "entertainment", "health", "science"],
            state="readonly",
            width=20
        )
        self.category_dropdown.pack(side='left', padx=5)
        self.category_dropdown.set("general")

        # Button frame
        button_frame = ttk.Frame(main_frame)
        button_frame.pack(fill='x', pady=10)
        
        ttk.Button(
            button_frame,
            text="Fetch News",
            command=self.fetch_news
        ).pack(side='left', padx=5)
        
        ttk.Button(
            button_frame,
            text="Visualize Sources",
            command=self.visualize
        ).pack(side='left', padx=5)

        # Treeview (news table)
        tree_frame = ttk.Frame(main_frame)
        tree_frame.pack(fill='both', expand=True)
        
        self.tree = ttk.Treeview(
            tree_frame,
            columns=("title", "source", "date"),
            show="headings",
            selectmode="browse"
        )
        
        # Column configuration
        self.tree.heading("title", text="Headline")
        self.tree.heading("source", text="Source")
        self.tree.heading("date", text="Date")
        
        self.tree.column("title", width=400, anchor='w')
        self.tree.column("source", width=150, anchor='center')
        self.tree.column("date", width=150, anchor='center')
        
        # Scrollbar
        scrollbar = ttk.Scrollbar(tree_frame, orient="vertical", command=self.tree.yview)
        scrollbar.pack(side='right', fill='y')
        self.tree.configure(yscrollcommand=scrollbar.set)
        self.tree.pack(fill='both', expand=True)

        # Status bar
        self.status_var = tk.StringVar()
        self.status_var.set("Ready")
        ttk.Label(main_frame, textvariable=self.status_var, relief='sunken').pack(fill='x')

    def fetch_news(self):
        """Fetch news and display in Treeview"""
        category = self.category_var.get()
        query = self.search_var.get() or None
        self.status_var.set(f"Loading {category} news..." if not query else f"Searching for '{query}'...")
        self.root.update()
        
        # Clear Treeview
        for item in self.tree.get_children():
            self.tree.delete(item)
        
        news = self.aggregator.aggregate_news(category, query)
        
        if not news:
            messagebox.showwarning("Empty", "No news found! Please check your API key or try a different search.")
            self.status_var.set("Failed to load news")
            return
        
        for article in news:
            self.tree.insert(
                "",
                "end",
                values=(
                    article["title"],
                    article["source"]["name"],
                    article["publishedAt"]
                ),
                tags=("article",)
            )
        
        self.status_var.set(f"Loaded {len(news)} news articles")

    def visualize(self):
        """Visualize news sources"""
        try:
            self.aggregator.visualize_news_sources()
        except Exception as e:
            messagebox.showerror("Error", str(e))

class TestNewsAggregator(TestCase):
    def setUp(self):
        self.aggregator = NewsAggregator()

    @mock.patch('requests.Session.get')
    def test_fetch_news_from_api(self, mock_get):
        mock_response = mock.Mock()
        mock_response.status_code = 200
        mock_response.json.return_value = {
            "status": "ok",
            "articles": [{"title": "Test News", "source": {"name": "Test Source"}}]
        }
        mock_get.return_value = mock_response
        
        news = self.aggregator.fetch_news_from_api()
        self.assertEqual(news[0]["title"], "Test News")

    @mock.patch('requests.Session.get')
    def test_scrape_article_content(self, mock_get):
        mock_response = mock.Mock()
        mock_response.text = "<html><body><p>Test Content</p></body></html>"
        mock_get.return_value = mock_response
        
        content = self.aggregator.scrape_article_content("http://example.com")
        self.assertIn("Test Content", content)

if __name__ == "__main__":
    root = tk.Tk()
    app = NewsApp(root)
    
    # Center the window
    window_width = 800
    window_height = 600
    screen_width = root.winfo_screenwidth()
    screen_height = root.winfo_screenheight()
    x = (screen_width // 2) - (window_width // 2)
    y = (screen_height // 2) - (window_height // 2)
    root.geometry(f"{window_width}x{window_height}+{x}+{y}")
    
    root.mainloop()