In [12]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import json
import requests
from typing import Dict, List, Optional, Tuple
import sqlite3
import hashlib
import time
import re
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
import logging
import random

from mycongif import ConfigManager

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class RobustStockNewsEventsFetcher:
    def __init__(self, db_path: str = "db/stock_news.db"):
        self.db_path = db_path
        self.session = requests.Session()
        self.apikeys = ConfigManager("private/apikeys.ini")
        
        # Rotate user agents to avoid detection
        self.user_agents = [
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0'
        ]
        
        self.init_database()
        
        # Updated news sources with better strategies
        self.news_sources = {
            'yahoo_finance_api': {
                'function': self._fetch_yahoo_news_api,
                'description': 'Yahoo Finance News API'
            },
            'alpha_vantage_news': {
                'function': self._fetch_alpha_vantage_news,
                'description': 'Alpha Vantage News API'
            },
            'newsapi': {
                'function': self._fetch_newsapi,
                'description': 'NewsAPI.org'
            },
            'financial_modeling_prep': {
                'function': self._fetch_fmp_news,
                'description': 'Financial Modeling Prep'
            },
            'polygon_news': {
                'function': self._fetch_polygon_news,
                'description': 'Polygon.io News'
            }
        }
    
    def init_database(self):
        """Initialize SQLite database with required tables"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        # Create news table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS news_articles (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                ticker TEXT NOT NULL,
                title TEXT NOT NULL,
                url TEXT NOT NULL,
                content TEXT,
                summary TEXT,
                publisher TEXT,
                author TEXT,
                publish_date DATETIME,
                scraped_date DATETIME DEFAULT CURRENT_TIMESTAMP,
                source_site TEXT,
                content_hash TEXT UNIQUE,
                sentiment_score REAL,
                UNIQUE(url, ticker)
            )
        ''')
        
        # Create events table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS stock_events (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                ticker TEXT NOT NULL,
                event_date DATE,
                event_type TEXT,
                event_description TEXT,
                event_value REAL,
                scraped_date DATETIME DEFAULT CURRENT_TIMESTAMP,
                UNIQUE(ticker, event_date, event_type, event_description)
            )
        ''')
        
        # Create price movements table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS price_movements (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                ticker TEXT NOT NULL,
                movement_date DATE,
                price_change_percent REAL,
                close_price REAL,
                volume INTEGER,
                movement_type TEXT,
                scraped_date DATETIME DEFAULT CURRENT_TIMESTAMP,
                UNIQUE(ticker, movement_date)
            )
        ''')
        
        # Create indexes for better performance
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_news_ticker_date ON news_articles(ticker, publish_date)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_events_ticker_date ON stock_events(ticker, event_date)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_movements_ticker_date ON price_movements(ticker, movement_date)')
        
        conn.commit()
        conn.close()
    
    def get_content_hash(self, content: str) -> str:
        """Generate MD5 hash of content to detect duplicates"""
        return hashlib.md5(content.encode('utf-8')).hexdigest()
    
    def is_news_exists(self, url: str, ticker: str, content_hash: str) -> bool:
        """Check if news article already exists in database"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            SELECT id FROM news_articles 
            WHERE (url = ? AND ticker = ?) OR content_hash = ?
        ''', (url, ticker, content_hash))
        
        exists = cursor.fetchone() is not None
        conn.close()
        return exists
    
    def save_news_to_db(self, news_data: Dict, ticker: str):
        """Save news article to database if it doesn't exist"""
        content_hash = self.get_content_hash(news_data.get('content', '') + news_data.get('title', ''))
        
        if self.is_news_exists(news_data.get('url', ''), ticker, content_hash):
            logger.info(f"News already exists: {news_data.get('title', 'Unknown')[:50]}...")
            return False
        
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        try:
            cursor.execute('''
                INSERT INTO news_articles 
                (ticker, title, url, content, summary, publisher, author, publish_date, source_site, content_hash)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                ticker,
                news_data.get('title', ''),
                news_data.get('url', ''),
                news_data.get('content', ''),
                news_data.get('summary', ''),
                news_data.get('publisher', ''),
                news_data.get('author', ''),
                news_data.get('publish_date'),
                news_data.get('source_site', ''),
                content_hash
            ))
            conn.commit()
            logger.info(f"Saved new news: {news_data.get('title', 'Unknown')[:50]}...")
            return True
        except sqlite3.IntegrityError:
            logger.info(f"Duplicate news detected: {news_data.get('title', 'Unknown')[:50]}...")
            return False
        finally:
            conn.close()
    
    def _get_random_headers(self):
        """Get random headers to avoid detection"""
        return {
            'User-Agent': random.choice(self.user_agents),
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
        }
    
    def _fetch_yahoo_news_api(self, ticker: str) -> List[Dict]:
        """Fetch news using Yahoo Finance's internal API"""
        articles = []
        try:
            # Try Yahoo's news API endpoint
            url = f"https://query1.finance.yahoo.com/v1/finance/search?q={ticker}"
            headers = self._get_random_headers()
            
            response = requests.get(url, headers=headers, timeout=10)
            if response.status_code == 200:
                data = response.json()
                # Process news if available in response
                if 'news' in data:
                    for item in data['news'][:10]:
                        articles.append({
                            'title': item.get('title', ''),
                            'url': item.get('link', ''),
                            'content': item.get('summary', ''),
                            'summary': item.get('summary', '')[:200] + '...' if len(item.get('summary', '')) > 200 else item.get('summary', ''),
                            'publisher': item.get('publisher', 'Yahoo Finance'),
                            'author': '',
                            'publish_date': self._convert_timestamp(item.get('providerPublishTime')),
                            'source_site': 'yahoo_finance_api'
                        })
            
            # Alternative: Try yfinance news (more reliable)
            try:
                stock = yf.Ticker(ticker)
                news = stock.news
                for item in news[:10]:
                    articles.append({
                        'title': item.get('title', ''),
                        'url': item.get('link', ''),
                        'content': item.get('summary', ''),
                        'summary': item.get('summary', '')[:200] + '...' if len(item.get('summary', '')) > 200 else item.get('summary', ''),
                        'publisher': item.get('publisher', 'Yahoo Finance'),
                        'author': '',
                        'publish_date': self._convert_timestamp(item.get('providerPublishTime')),
                        'source_site': 'yfinance_news'
                    })
            except Exception as e:
                logger.debug(f"yfinance news error: {e}")
                
        except Exception as e:
            logger.error(f"Yahoo News API error: {e}")
        
        return articles
    
    def _fetch_alpha_vantage_news(self, ticker: str) -> List[Dict]:
        """Fetch news from Alpha Vantage (requires API key)"""
        articles = []
        # Note: This requires an API key from Alpha Vantage
        # You can get a free one at https://www.alphavantage.co/support/#api-key
        
        api_key = self.apikeys.get_key("alpha_vantage", "apikey") # Replace with your API key
        if api_key == "YOUR_ALPHA_VANTAGE_API_KEY":
            logger.info("Alpha Vantage API key not provided, skipping...")
            return articles
        
        try:
            url = f"https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={ticker}&apikey={api_key}"
            response = requests.get(url, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                if 'feed' in data:
                    for item in data['feed'][:10]:
                        articles.append({
                            'title': item.get('title', ''),
                            'url': item.get('url', ''),
                            'content': item.get('summary', ''),
                            'summary': item.get('summary', '')[:200] + '...' if len(item.get('summary', '')) > 200 else item.get('summary', ''),
                            'publisher': item.get('source', 'Alpha Vantage'),
                            'author': item.get('authors', [''])[0] if item.get('authors') else '',
                            'publish_date': item.get('time_published', ''),
                            'source_site': 'alpha_vantage'
                        })
        except Exception as e:
            logger.error(f"Alpha Vantage error: {e}")
        
        return articles
    
    def _fetch_newsapi(self, ticker: str) -> List[Dict]:
        """Fetch news from NewsAPI.org (requires API key)"""
        articles = []
        # Note: This requires an API key from NewsAPI.org
        # You can get a free one at https://newsapi.org/
        
        api_key = self.apikeys.get_key("newsapi", "apikey")  # Replace with your API key
        if api_key == "YOUR_NEWSAPI_KEY":
            logger.info("NewsAPI key not provided, skipping...")
            return articles
        
        try:
            # Get company name for better search results
            stock = yf.Ticker(ticker)
            company_name = stock.info.get('longName', ticker)
            
            url = f"https://newsapi.org/v2/everything?q={company_name}&apiKey={api_key}&sortBy=publishedAt&pageSize=10"
            response = requests.get(url, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                if 'articles' in data:
                    for item in data['articles']:
                        articles.append({
                            'title': item.get('title', ''),
                            'url': item.get('url', ''),
                            'content': item.get('content', ''),
                            'summary': item.get('description', '')[:200] + '...' if len(item.get('description', '')) > 200 else item.get('description', ''),
                            'publisher': item.get('source', {}).get('name', 'NewsAPI'),
                            'author': item.get('author', ''),
                            'publish_date': item.get('publishedAt', ''),
                            'source_site': 'newsapi'
                        })
        except Exception as e:
            logger.error(f"NewsAPI error: {e}")
        
        return articles
    
    def _fetch_fmp_news(self, ticker: str) -> List[Dict]:
        """Fetch news from Financial Modeling Prep (requires API key)"""
        articles = []
        # Note: This requires an API key from Financial Modeling Prep
        # You can get a free one at https://financialmodelingprep.com/
        
        api_key = self.apikeys.get_key("fmp", "apikey")  # Replace with your API key
        if api_key == "YOUR_FMP_API_KEY":
            logger.info("FMP API key not provided, skipping...")
            return articles
        
        try:
            url = f"https://financialmodelingprep.com/api/v3/stock_news?tickers={ticker}&limit=10&apikey={api_key}"
            response = requests.get(url, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                for item in data:
                    articles.append({
                        'title': item.get('title', ''),
                        'url': item.get('url', ''),
                        'content': item.get('text', ''),
                        'summary': item.get('text', '')[:200] + '...' if len(item.get('text', '')) > 200 else item.get('text', ''),
                        'publisher': item.get('site', 'FMP'),
                        'author': '',
                        'publish_date': item.get('publishedDate', ''),
                        'source_site': 'fmp'
                    })
        except Exception as e:
            logger.error(f"FMP error: {e}")
        
        return articles
    
    def _fetch_polygon_news(self, ticker: str) -> List[Dict]:
        """Fetch news from Polygon.io (requires API key)"""
        articles = []
        # Note: This requires an API key from Polygon.io
        # You can get a free one at https://polygon.io/
        
        api_key = self.apikeys.get_key("polygon", "apikey")  # Replace with your API key
        if api_key == "YOUR_POLYGON_API_KEY":
            logger.info("Polygon API key not provided, skipping...")
            return articles
        
        try:
            url = f"https://api.polygon.io/v2/reference/news?ticker={ticker}&limit=10&apikey={api_key}"
            response = requests.get(url, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                if 'results' in data:
                    for item in data['results']:
                        articles.append({
                            'title': item.get('title', ''),
                            'url': item.get('article_url', ''),
                            'content': item.get('description', ''),
                            'summary': item.get('description', '')[:200] + '...' if len(item.get('description', '')) > 200 else item.get('description', ''),
                            'publisher': item.get('publisher', {}).get('name', 'Polygon'),
                            'author': item.get('author', ''),
                            'publish_date': item.get('published_utc', ''),
                            'source_site': 'polygon'
                        })
        except Exception as e:
            logger.error(f"Polygon error: {e}")
        
        return articles
    
    def _convert_timestamp(self, timestamp):
        """Convert various timestamp formats to standard format"""
        if not timestamp:
            return datetime.now().strftime('%Y-%m-%d')
        
        try:
            if isinstance(timestamp, (int, float)):
                # Unix timestamp
                return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')
            else:
                # String format
                return pd.to_datetime(timestamp).strftime('%Y-%m-%d')
        except:
            return datetime.now().strftime('%Y-%m-%d')
    
    def scrape_news_for_ticker(self, ticker: str, days_back: int = 30) -> int:
        """Scrape news from multiple sources for a given ticker"""
        total_new_articles = 0
        
        for source_name, source_config in self.news_sources.items():
            logger.info(f"Fetching from {source_config['description']} for {ticker}...")
            
            try:
                # Fetch articles using source-specific function
                articles = source_config['function'](ticker)
                
                # Save new articles to database
                new_count = 0
                for article in articles:
                    if self.save_news_to_db(article, ticker):
                        new_count += 1
                
                total_new_articles += new_count
                logger.info(f"Found {len(articles)} articles from {source_name}, {new_count} were new")
                
                # Be respectful to APIs
                time.sleep(2)
                
            except Exception as e:
                logger.error(f"Error fetching from {source_name} for {ticker}: {str(e)}")
                continue
        
        return total_new_articles
    
    def get_stock_data_and_news(self, ticker: str, years_back: int = 10) -> Dict:
        """Get comprehensive stock data including scraped news"""
        logger.info(f"Starting comprehensive data collection for {ticker}")
        
        # First, scrape fresh news
        new_articles_count = self.scrape_news_for_ticker(ticker, days_back=365)
        logger.info(f"Scraped {new_articles_count} new articles for {ticker}")
        
        # Get traditional yfinance data
        stock_data = self._get_yfinance_data(ticker, years_back)
        
        # Get news from database
        db_news = self._get_news_from_db(ticker, days_back=365)
        
        # Combine all data
        result = stock_data.copy()
        result['scraped_news'] = db_news
        result['scraped_news_count'] = len(db_news)
        result['new_articles_found'] = new_articles_count
        
        return result
    
    def _get_yfinance_data(self, ticker: str, years_back: int) -> Dict:
        """Get stock data from yfinance (existing functionality)"""
        try:
            stock = yf.Ticker(ticker)
            end_date = datetime.now()
            start_date = end_date - timedelta(days=years_back * 365)
            
            info = stock.info
            hist_data = stock.history(start=start_date, end=end_date)
            actions = stock.actions
            
            if not actions.empty:
                actions_filtered = actions[actions.index >= start_date.strftime('%Y-%m-%d')]
            else:
                actions_filtered = pd.DataFrame()
            
            significant_events = self._identify_significant_events(hist_data)
            
            return {
                'ticker': ticker,
                'company_name': info.get('longName', 'N/A'),
                'sector': info.get('sector', 'N/A'),
                'industry': info.get('industry', 'N/A'),
                'date_range': {
                    'start': start_date.strftime('%Y-%m-%d'),
                    'end': end_date.strftime('%Y-%m-%d')
                },
                'corporate_actions': self._format_actions(actions_filtered),
                'significant_price_events': significant_events,
                'summary_stats': {
                    'dividends_count': len(actions_filtered[actions_filtered['Dividends'] > 0]) if not actions_filtered.empty else 0,
                    'stock_splits_count': len(actions_filtered[actions_filtered['Stock Splits'] > 0]) if not actions_filtered.empty else 0,
                    'significant_events_count': len(significant_events)
                }
            }
        except Exception as e:
            logger.error(f"Error getting yfinance data: {e}")
            return {'error': str(e)}
    
    def _get_news_from_db(self, ticker: str, days_back: int = 365) -> List[Dict]:
        """Retrieve news articles from database"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cutoff_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
        
        cursor.execute('''
            SELECT title, url, content, summary, publisher, author, publish_date, source_site, scraped_date
            FROM news_articles 
            WHERE ticker = ? AND (publish_date >= ? OR publish_date IS NULL)
            ORDER BY publish_date DESC, scraped_date DESC
        ''', (ticker, cutoff_date))
        
        rows = cursor.fetchall()
        conn.close()
        
        news_list = []
        for row in rows:
            news_list.append({
                'title': row[0],
                'url': row[1],
                'content': row[2],
                'summary': row[3],
                'publisher': row[4],
                'author': row[5],
                'publish_date': row[6],
                'source_site': row[7],
                'scraped_date': row[8]
            })
        
        return news_list
    
    def _format_actions(self, actions: pd.DataFrame) -> List[Dict]:
        """Format corporate actions data"""
        if actions.empty:
            return []
        
        formatted_actions = []
        for date, row in actions.iterrows():
            date_str = date.strftime('%Y-%m-%d') if hasattr(date, 'strftime') else str(date)
            
            if row['Dividends'] > 0:
                formatted_actions.append({
                    'date': date_str,
                    'type': 'Dividend',
                    'amount': float(row['Dividends'])
                })
            if row['Stock Splits'] > 0:
                formatted_actions.append({
                    'date': date_str,
                    'type': 'Stock Split',
                    'ratio': float(row['Stock Splits'])
                })
        
        return formatted_actions
    
    def _identify_significant_events(self, hist_data: pd.DataFrame, threshold: float = 0.05) -> List[Dict]:
        """Identify significant price movements"""
        if hist_data.empty:
            return []
        
        hist_data['Daily_Return'] = hist_data['Close'].pct_change()
        significant_days = hist_data[abs(hist_data['Daily_Return']) > threshold]
        
        events = []
        for date, row in significant_days.iterrows():
            date_str = date.strftime('%Y-%m-%d') if hasattr(date, 'strftime') else str(date)
            
            events.append({
                'date': date_str,
                'price_change_percent': round(float(row['Daily_Return']) * 100, 2),
                'close_price': round(float(row['Close']), 2),
                'volume': int(row['Volume']),
                'type': 'Significant Price Movement'
            })
        
        events.sort(key=lambda x: x['date'], reverse=True)
        return events[:50]
    
    def print_comprehensive_summary(self, data: Dict):
        """Print comprehensive summary including scraped news"""
        if 'error' in data:
            print(f"Error: {data['error']}")
            return
        
        print(f"\n=== Comprehensive Summary for {data['ticker']} ({data['company_name']}) ===")
        print(f"Sector: {data['sector']}")
        print(f"Industry: {data['industry']}")
        print(f"Date Range: {data['date_range']['start']} to {data['date_range']['end']}")
        
        print(f"\n=== News Summary ===")
        print(f"  • Total Scraped Articles: {data['scraped_news_count']}")
        print(f"  • New Articles Found: {data['new_articles_found']}")
        print(f"  • Corporate Actions: {data['summary_stats']['dividends_count']} dividends, {data['summary_stats']['stock_splits_count']} splits")
        print(f"  • Significant Price Events: {data['summary_stats']['significant_events_count']}")
        
        # Show recent scraped news
        if data['scraped_news']:
            print(f"\n=== Recent News (Top 5) ===")
            for i, news in enumerate(data['scraped_news'][:5]):
                print(f"{i+1}. {news['title']}")
                print(f"   Source: {news['publisher']} ({news['source_site']}) | Date: {news['publish_date']}")
                print(f"   URL: {news['url']}")
                if news['summary']:
                    print(f"   Summary: {news['summary'][:150]}...")
                print()
        
        # Setup instructions
        print(f"\n=== Setup Instructions for Full Functionality ===")
        print("To get news from all sources, you'll need API keys (all have free tiers):")
        print("1. Alpha Vantage: https://www.alphavantage.co/support/#api-key")
        print("2. NewsAPI: https://newsapi.org/")
        print("3. Financial Modeling Prep: https://financialmodelingprep.com/")
        print("4. Polygon.io: https://polygon.io/")
        print("\nReplace the 'YOUR_*_API_KEY' placeholders in the code with your actual keys.")
    
    def export_to_json(self, data: Dict, filename: str = None):
        """Export data to JSON file"""
        if filename is None:
            filename = f"{data.get('ticker', 'unknown')}_comprehensive_data_{datetime.now().strftime('%Y%m%d')}.json"
        
        # Make data JSON serializable
        json_data = self._make_json_serializable(data)
        
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(json_data, f, indent=2, ensure_ascii=False, default=str)
        
        print(f"Data exported to {filename}")
    
    def _make_json_serializable(self, obj):
        """Convert objects to JSON serializable format"""
        if isinstance(obj, dict):
            return {key: self._make_json_serializable(value) for key, value in obj.items()}
        elif isinstance(obj, list):
            return [self._make_json_serializable(item) for item in obj]
        elif isinstance(obj, pd.Timestamp):
            return obj.strftime('%Y-%m-%d %H:%M:%S')
        elif isinstance(obj, datetime):
            return obj.strftime('%Y-%m-%d %H:%M:%S')
        elif hasattr(obj, 'item'):
            return obj.item()
        elif pd.isna(obj):
            return None
        else:
            return obj


# Example usage
def main():
    # Initialize the enhanced fetcher
    fetcher = RobustStockNewsEventsFetcher()
    
    # Example: Get comprehensive data for Boeing
    ticker = "BA"
    
    print("=== Robust Stock News and Events Fetcher ===")
    print("Features:")
    print("  • Multiple API-based news sources (more reliable than web scraping)")
    print("  • SQLite database storage with duplicate detection")
    print("  • Comprehensive stock event tracking")
    print("  • Fallback strategies for when sources fail")
    print("\nRequired packages: pip install yfinance pandas beautifulsoup4 requests")
    print(f"\nFetching comprehensive data for {ticker}...")
    
    # Get comprehensive data
    data = fetcher.get_stock_data_and_news(ticker, years_back=2)
    
    # Print comprehensive summary
    fetcher.print_comprehensive_summary(data)
    
    # Export to JSON
    fetcher.export_to_json(data)

In [13]:
# Initialize fetcher
fetcher = RobustStockNewsEventsFetcher()
    
# Example: Get comprehensive data for Boeing
ticker = "BA"

print("=== Robust Stock News and Events Fetcher ===")
print("Features:")
print("  • Multiple API-based news sources (more reliable than web scraping)")
print("  • SQLite database storage with duplicate detection")
print("  • Comprehensive stock event tracking")
print("  • Fallback strategies for when sources fail")
print("\nRequired packages: pip install yfinance pandas beautifulsoup4 requests")
print(f"\nFetching comprehensive data for {ticker}...")

# Get comprehensive data
data = fetcher.get_stock_data_and_news(ticker, years_back=2)

# Print comprehensive summary
fetcher.print_comprehensive_summary(data)

# Export to JSON
fetcher.export_to_json(data)

INFO:__main__:Starting comprehensive data collection for BA
INFO:__main__:Fetching from Yahoo Finance News API for BA...


Configuration loaded from: private/apikeys.ini
=== Robust Stock News and Events Fetcher ===
Features:
  • Multiple API-based news sources (more reliable than web scraping)
  • SQLite database storage with duplicate detection
  • Comprehensive stock event tracking
  • Fallback strategies for when sources fail

Required packages: pip install yfinance pandas beautifulsoup4 requests

Fetching comprehensive data for BA...


INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:News already exists: ...
INFO:__main__:Found 10 articles from yahoo_finance_api, 0 were new
INFO:__main__:Fetching from Alpha Vantage News API for BA...
INFO:__main__:Saved new news: INVESTOR ALERT: Pomerantz Law Firm Investigates Cl...
INFO:__main__:Saved new news: Wall Street Hits Record Highs, Nike Jumps 18%: Wha...
INFO:__main__:Saved new news: RTX Clinches a $250M Deal From MELCO to Produce ES...
INFO:__main__:Saved new news: Howmet vs. Textron: Which Aerospace & Defense Stoc...
INFO:__main__:Saved new news: Lockheed Secures a $250M Contract Involving F-35 F...
INFO:__main__:Saved new news: A CAPITOL FOURTH CELEBRATES 45 Y


=== Comprehensive Summary for BA (The Boeing Company) ===
Sector: Industrials
Industry: Aerospace & Defense
Date Range: 2023-06-29 to 2025-06-28

=== News Summary ===
  • Total Scraped Articles: 29
  • New Articles Found: 28
  • Corporate Actions: 0 dividends, 0 splits
  • Significant Price Events: 15

=== Recent News (Top 5) ===
1. INVESTOR ALERT: Pomerantz Law Firm Investigates Claims On Behalf of Investors of The Boeing Company - BA - Boeing  ( NYSE:BA ) 
   Source: Benzinga (alpha_vantage) | Date: 20250628T140000
   URL: https://www.benzinga.com/pressreleases/25/06/g46157463/investor-alert-pomerantz-law-firm-investigates-claims-on-behalf-of-investors-of-the-boeing-company
   Summary: NEW YORK, June 28, 2025 ( GLOBE NEWSWIRE ) -- Pomerantz LLP is investigating claims on behalf of investors of The Boeing Company ( "Boeing" or the "Co...

2. Wall Street Hits Record Highs, Nike Jumps 18%: What's Moving Markets Friday? Wall Street Hits Record Highs, Nike Jumps 18%: What's Moving Market

In [14]:
sqldata = fetcher._get_news_from_db(ticker="BA")

In [19]:
sqldata[1]

{'title': "Wall Street Hits Record Highs, Nike Jumps 18%: What's Moving Markets Friday? Wall Street Hits Record Highs, Nike Jumps 18%: What's Moving Markets Friday? - Boeing  ( NYSE:BA ) ",
 'url': 'https://www.benzinga.com/markets/equities/25/06/46147570/markets-today-news-wall-street-dow-jones-nasdaq-sp500-nike',
 'content': "S&P 500 and Nasdaq 100 hit new record highs on trade optimism. Nike jumps 18% after earnings beat, leading Dow's charge toward 44,000. Market-moving news hits Benzinga Pro first-get a 30-minute edge and save 60% this 4th of July.",
 'summary': "S&P 500 and Nasdaq 100 hit new record highs on trade optimism. Nike jumps 18% after earnings beat, leading Dow's charge toward 44,000. Market-moving news hits Benzinga Pro first-get a 30-minute edge a...",
 'publisher': 'Benzinga',
 'author': 'Piero Cingari',
 'publish_date': '20250627T171124',
 'source_site': 'alpha_vantage',
 'scraped_date': '2025-06-28 22:06:51'}

In [3]:
# "UL6SJRQMIVMO4IQR"

from mycongif import ConfigManager
apikeys = ConfigManager("private/apikeys.ini")

Configuration loaded from: private/apikeys.ini


In [10]:
apikeys.add_key("polygon", "apikey", "vOV6QumNVGPIwq2pjRCf1U0MS0APOFDA")
apikeys.add_key("polygon", "url_apikey", "https://polygon.io")
apikeys.save_config()

Created new section: [polygon]
Added/Updated key: [polygon]apikey = vOV6QumNVGPIwq2pjRCf1U0MS0APOFDA
Added/Updated key: [polygon]url_apikey = https://polygon.io
Configuration saved to: private/apikeys.ini
