In [None]:
import requests
from datetime import datetime, timedelta
import json

class IndianNewsFetcher:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://newsapi.org/v2/everything"
        
        # Predefined search categories
        self.categories = {
            'budget': 'India AND (budget OR finance OR economy OR fiscal OR "union budget")',
            'government': 'India AND (government OR politics OR parliament OR "lok sabha" OR Modi)',
            'business': 'India AND (business OR economy OR market OR corporate OR startup)',
            'technology': 'India AND (technology OR tech OR IT OR "artificial intelligence" OR digital)',
            'general': 'India',
            'health': 'India AND (health OR medical OR hospital OR covid OR healthcare)',
            'education': 'India AND (education OR school OR university OR student OR exam)',
            'sports': 'India AND (cricket OR sports OR olympics OR football OR hockey)'
        }
    
    def fetch_news(self, category='general', days_back=7, page_size=20):
        """
        Fetch Indian news by category
        """
        try:
            # Get search query for category
            if category in self.categories:
                query = self.categories[category]
            else:
                query = f'India AND {category}'
            
            # Date range
            end_date = datetime.today()
            start_date = end_date - timedelta(days=days_back)
            
            params = {
                'q': query,
                'from': start_date.strftime('%Y-%m-%d'),
                'to': end_date.strftime('%Y-%m-%d'),
                'sortBy': 'publishedAt',
                'pageSize': page_size,
                'language': 'en',
                'apiKey': self.api_key
            }
            
            print(f"üîç Fetching {category.upper()} news from India...")
            print(f"üìÖ Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
            
            response = requests.get(self.base_url, params=params)
            
            if response.status_code != 200:
                print(f"‚ùå HTTP Error: {response.status_code}")
                return []
            
            data = response.json()
            
            if data.get("status") != "ok":
                print(f"‚ùå API Error: {data.get('message')}")
                return []
            
            articles = data.get("articles", [])
            print(f"üìä Found {len(articles)} total articles")
            
            # Filter for India-related content
            filtered_articles = self._filter_indian_articles(articles)
            print(f"‚úÖ {len(filtered_articles)} India-relevant articles")
            
            return filtered_articles
            
        except Exception as e:
            print(f"‚ùå Error: {e}")
            return []
    
    def _filter_indian_articles(self, articles):
        """
        Filter articles to ensure they're India-related
        """
        india_keywords = [
            'india', 'indian', 'delhi', 'mumbai', 'bangalore', 'kolkata', 'chennai',
            'modi', 'parliament', 'lok sabha', 'rajya sabha', 'bjp', 'congress',
            'hindustan', 'bharat', 'new delhi', 'maharashtra', 'gujarat', 'karnataka',
            'tamil nadu', 'west bengal', 'uttar pradesh', 'punjab', 'kerala'
        ]
        
        filtered = []
        for article in articles:
            title = article.get("title", "").lower()
            description = article.get("description", "").lower()
            content = article.get("content", "").lower()
            source = article.get("source", {}).get("name", "").lower()
            
            # Check if article is India-related
            text_to_check = f"{title} {description} {content} {source}"
            
            if any(keyword in text_to_check for keyword in india_keywords):
                filtered.append({
                    "title": article.get("title"),
                    "description": article.get("description"),
                    "content": article.get("content"),
                    "url": article.get("url"),
                    "source": article.get("source", {}).get("name"),
                    "publishedAt": article.get("publishedAt"),
                    "relevance_score": sum(1 for keyword in india_keywords if keyword in text_to_check)
                })
        
        # Sort by relevance score (most India-related first)
        filtered.sort(key=lambda x: x['relevance_score'], reverse=True)
        return filtered
    
    def get_trending_topics(self, days_back=3):
        """
        Get trending topics in Indian news
        """
        articles = self.fetch_news('general', days_back=days_back, page_size=50)
        
        # Extract keywords from titles
        keywords = {}
        for article in articles:
            title = article.get('title', '').lower()
            words = title.split()
            for word in words:
                if len(word) > 4 and word.isalpha():  # Filter meaningful words
                    keywords[word] = keywords.get(word, 0) + 1
        
        # Get top trending words
        trending = sorted(keywords.items(), key=lambda x: x[1], reverse=True)[:10]
        return trending
    
    def print_articles(self, articles, max_articles=10):
        """
        Pretty print articles
        """
        if not articles:
            print("‚ùå No articles found")
            return
        
        print(f"\nüì∞ Showing {min(len(articles), max_articles)} articles:")
        print("=" * 80)
        
        for i, article in enumerate(articles[:max_articles]):
            print(f"\nüî∏ Article {i+1}: {article.get('title', 'No Title')}")
            print(f"üìÖ Published: {article.get('publishedAt', 'Unknown')}")
            print(f"üì∞ Source: {article.get('source', 'Unknown')}")
            
            desc = article.get('description', 'No description')
            if desc and len(desc) > 200:
                desc = desc[:200] + "..."
            print(f"üìù Description: {desc}")
            
            content = article.get('content', '')
            if content and len(content) > 300:
                content = content[:300] + "..."
            if content:
                print(f"üìÑ Content: {content}")
            
            print(f"üîó URL: {article.get('url', 'No URL')}")
            print(f"üéØ Relevance: {article.get('relevance_score', 0)} India-related keywords")
            print("-" * 60)

def main():
    API_KEY = "58857cd8c1f341628b19836dcb69fc26"
    
    print("=" * 80)
    print("üáÆüá≥ INDIAN NEWS FETCHER")
    print("=" * 80)
    
    fetcher = IndianNewsFetcher(API_KEY)
    
    # Show available categories
    print("\nüìÇ Available categories:")
    for category in fetcher.categories.keys():
        print(f"   ‚Ä¢ {category}")
    
    print("\n" + "="*50)
    
    # Fetch different types of news
    categories_to_fetch = ['budget', 'government', 'business', 'technology']
    
    for category in categories_to_fetch:
        print(f"\nüîç FETCHING {category.upper()} NEWS:")
        print("-" * 40)
        
        articles = fetcher.fetch_news(category, days_back=7, page_size=15)
        fetcher.print_articles(articles, max_articles=3)  # Show top 3 for each category
        
        if not articles:
            print(f"   No {category} news found in the last 7 days")
    
    # Show trending topics
    print(f"\nüî• TRENDING TOPICS in Indian News:")
    print("-" * 40)
    trending = fetcher.get_trending_topics(days_back=5)
    for word, count in trending:
        print(f"   ‚Ä¢ {word.title()}: {count} mentions")

if __name__ == "__main__":
    main()

üáÆüá≥ INDIAN NEWS FETCHER

üìÇ Available categories:
   ‚Ä¢ budget
   ‚Ä¢ government
   ‚Ä¢ business
   ‚Ä¢ technology
   ‚Ä¢ general
   ‚Ä¢ health
   ‚Ä¢ education
   ‚Ä¢ sports


üîç FETCHING BUDGET NEWS:
----------------------------------------
üîç Fetching BUDGET news from India...
üìÖ Date range: 2025-07-28 to 2025-08-04
üìä Found 15 total articles
‚úÖ 8 India-relevant articles

üì∞ Showing 3 articles:

üî∏ Article 1: American: Dallas ‚Äì New Delhi, India. $629 (Basic Economy) / $790 (Regular Economy). Roundtrip, including all Taxes
üìÖ Published: 2025-08-03T15:25:36Z
üì∞ Source: Theflightdeal.com
üìù Description: A good sale to New Delhi.Matrix Airfare Search by ITA Software will price this at $790 (Regular Economy). Use those dates on American should reprice to $629 (Basic Economy) and $790 (Regular Econo
üìÑ Content: A good sale to New Delhi.
Matrix Airfare Search by ITA Software will price this at $790 (Regular Economy). Use those dates on American should reprice