In [2]:
!pip install Wikipedia-API

Collecting Wikipedia-API
  Downloading wikipedia_api-0.7.1.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: Wikipedia-API
  Building wheel for Wikipedia-API (setup.py) ... [?25ldone
[?25h  Created wheel for Wikipedia-API: filename=Wikipedia_API-0.7.1-py3-none-any.whl size=14346 sha256=b6601a093946413eeb7360d3ea03b7031e88aaed158963e88503eac234c27e4d
  Stored in directory: /home/yash/.cache/pip/wheels/48/93/2f/978da1e445cf17606445f4b47fd8454250f5440d5a10c677e9
Successfully built Wikipedia-API
Installing collected packages: Wikipedia-API
Successfully installed Wikipedia-API-0.7.1


In [4]:
import wikipediaapi
from typing import List, Dict, Optional
import logging
from dataclasses import dataclass
from datetime import datetime

@dataclass
class WikiSearchResult:
    """Data class to store Wikipedia article information"""
    title: str
    summary: str
    full_text: str
    url: str
    last_modified: datetime
    categories: List[str]

class WikipediaSearcher:
    def __init__(self, language: str = 'en', user_agent: str = 'WikipediaSearcher/1.0'):
        """
        Initialize Wikipedia API client
        
        Args:
            language: Language code (e.g., 'en' for English)
            user_agent: User agent string for API requests
        """
        self.wiki = wikipediaapi.Wikipedia(
            language=language,
            extract_format=wikipediaapi.ExtractFormat.WIKI,
            user_agent=user_agent
        )
        self.logger = logging.getLogger(__name__)

    def search_wikipedia(self, query: str, results_limit: int = 3) -> List[WikiSearchResult]:
        """
        Search Wikipedia and get detailed information for matching articles
        
        Args:
            query: Search query string
            results_limit: Maximum number of results to return
            
        Returns:
            List of WikiSearchResult objects containing article information
        """
        try:
            # Get the page for the search query
            page = self.wiki.page(query)
            
            if not page.exists():
                self.logger.warning(f"No exact match found for: {query}")
                return []

            # Get main page result
            result = self._process_page(page)
            results = [result]

            # Get related pages through links (if we want more results)
            if results_limit > 1:
                for link_title in list(page.links.keys())[:results_limit - 1]:
                    link_page = self.wiki.page(link_title)
                    if link_page.exists():
                        results.append(self._process_page(link_page))

            return results

        except Exception as e:
            self.logger.error(f"Error searching Wikipedia: {e}")
            return []

    def _process_page(self, page: wikipediaapi.WikipediaPage) -> WikiSearchResult:
        """Process a Wikipedia page and extract relevant information"""
        categories = [cat.title for cat in page.categories.values()]
        
        return WikiSearchResult(
            title=page.title,
            summary=page.summary,
            full_text=page.text,
            url=page.fullurl,
            last_modified=datetime.strptime(page.touched, '%Y-%m-%dT%H:%M:%SZ'),
            categories=categories
        )

    def format_result(self, result: WikiSearchResult, include_full_text: bool = False) -> str:
        """
        Format a search result for display
        
        Args:
            result: WikiSearchResult object to format
            include_full_text: Whether to include the full article text
            
        Returns:
            Formatted string containing article information
        """
        formatted = f"""
Title: {result.title}
URL: {result.url}
Last Modified: {result.touched}
Categories: {', '.join(result.categories[:5])}{'...' if len(result.categories) > 5 else ''}

Summary:
{result.summary}
"""
        if include_full_text:
            formatted += f"\nFull Text:\n{result.full_text}"
            
        return formatted

def main():
    # Set up logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    
    # Example usage
    query = (
        "US state"
    )
    
    searcher = WikipediaSearcher()
    results = searcher.search_wikipedia(query, results_limit=3)
    
    if not results:
        print(f"No results found for query: {query}")
        return
        
    for idx, result in enumerate(results, 1):
        print(f"\nResult {idx}:")
        print("-" * 60)
        print(searcher.format_result(result))

main()

2024-10-22 05:34:54,015 - INFO - Wikipedia: language=en, user_agent: WikipediaSearcher/1.0 (Wikipedia-API/0.7.1; https://github.com/martin-majlis/Wikipedia-API/), extract_format=1
2024-10-22 05:34:54,016 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=US state&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
2024-10-22 05:34:54,660 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=categories&titles=U.S. state&cllimit=500
2024-10-22 05:34:54,940 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=U.S. state&explaintext=1&exsectionformat=wiki
2024-10-22 05:34:55,438 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=U.S. state&pllimit=500
2024-10-22 05:34:55,935 - INFO - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=U.S. state&pllimit=500&plco


Result 1:
------------------------------------------------------------


AttributeError: 'WikiSearchResult' object has no attribute 'touched'