In [1]:
!pip install Wikipedia-API



In [1]:
import wikipediaapi
from typing import List, Dict, Optional
import logging
from dataclasses import dataclass
from datetime import datetime

@dataclass
class WikiSearchResult:
    """Data class to store Wikipedia article information"""
    title: str
    summary: str
    full_text: str
    url: str
    last_modified: datetime
    categories: List[str]

class WikipediaSearcher:
    def __init__(self, language: str = 'en', user_agent: str = 'WikipediaSearcher/1.0'):
        """
        Initialize Wikipedia API client
        
        Args:
            language: Language code (e.g., 'en' for English)
            user_agent: User agent string for API requests
        """
        self.wiki = wikipediaapi.Wikipedia(
            language=language,
            extract_format=wikipediaapi.ExtractFormat.WIKI,
            user_agent=user_agent
        )
        self.logger = logging.getLogger(__name__)

    def search_wikipedia(self, query: str, results_limit: int = 3) -> List[WikiSearchResult]:
        """
        Search Wikipedia and get detailed information for matching articles
        
        Args:
            query: Search query string
            results_limit: Maximum number of results to return
            
        Returns:
            List of WikiSearchResult objects containing article information
        """
        try:
            # Get the page for the search query
            page = self.wiki.page(query)
            
            if not page.exists():
                self.logger.warning(f"No exact match found for: {query}")
                return []

            # Get main page result
            result = self._process_page(page)
            results = [result]

            # Get related pages through links (if we want more results)
            if results_limit > 1:
                for link_title in list(page.links.keys())[:results_limit - 1]:
                    link_page = self.wiki.page(link_title)
                    if link_page.exists():
                        results.append(self._process_page(link_page))

            return results

        except Exception as e:
            self.logger.error(f"Error searching Wikipedia: {e}")
            return []

    def _process_page(self, page: wikipediaapi.WikipediaPage) -> WikiSearchResult:
        """Process a Wikipedia page and extract relevant information"""
        categories = [cat.title for cat in page.categories.values()]
        
        return WikiSearchResult(
            title=page.title,
            summary=page.summary,
            full_text=page.text,
            url=page.fullurl,
            last_modified=datetime.strptime(page.touched, '%Y-%m-%dT%H:%M:%SZ'),
            categories=categories
        )

    def format_result(self, result: WikiSearchResult, include_full_text: bool = False) -> str:
        """
        Format a search result for display
        
        Args:
            result: WikiSearchResult object to format
            include_full_text: Whether to include the full article text
            
        Returns:
            Formatted string containing article information
        """
        formatted = f"""
Title: {result.title}
URL: {result.url}
Last Modified: {result.last_modified}
Categories: {', '.join(result.categories[:5])}{'...' if len(result.categories) > 5 else ''}

Summary:
{result.summary}
"""
        if include_full_text:
            formatted += f"\nFull Text:\n{result.full_text}"
            
        return formatted

def main():
    # Set up logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    
    # Example usage
    query = (
        "Clash of Clans"
    )
    
    searcher = WikipediaSearcher()
    results = searcher.search_wikipedia(query, results_limit=3)
    
    if not results:
        print(f"No results found for query: {query}")
        return
        
    for idx, result in enumerate(results, 1):
        print(f"\nResult {idx}:")
        print("-" * 60)
        print(searcher.format_result(result))

if __name__ == "__main__":
    main()

2024-11-15 11:36:41,504 - INFO - Wikipedia: language=en, user_agent: WikipediaSearcher/1.0 (Wikipedia-API/0.7.1; https://github.com/martin-majlis/Wikipedia-API/), extract_format=1
2024-11-15 11:36:41,505 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Clash of Clans&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
2024-11-15 11:36:42,006 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=categories&titles=Clash of Clans&cllimit=500
2024-11-15 11:36:42,287 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Clash of Clans&explaintext=1&exsectionformat=wiki
2024-11-15 11:36:42,774 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Clash of Clans&pllimit=500
2024-11-15 11:36:43,103 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Android (o


Result 1:
------------------------------------------------------------

Title: Clash of Clans
URL: https://en.wikipedia.org/wiki/Clash_of_Clans
Last Modified: 2024-11-12 18:17:26
Categories: Category:2010s fads and trends, Category:2012 video games, Category:All Wikipedia articles in need of updating, Category:Android (operating system) games, Category:Articles using Infobox video game using locally defined parameters...

Summary:
Clash of Clans is a 2012 free-to-play mobile strategy video game developed and published by the Finnish game-development company Supercell. The game was released for iOS platforms on 2 August 2012, and on Google Play for Android on 7 October 2013.
The game is set in a fantasy-themed persistent world where the player is the chief of a village. Clash of Clans tasks players to build their own village using the resources gained from attacking other players' villages with troops, earning rewards, buying them with medals or by producing them at their own village. 