In [None]:
import os
import asyncio
import logging
import warnings
import json
from datetime import datetime
from glob import glob
from typing import List, Dict, Optional
import datamule as dm
import pandas as pd
from selectolax.parser import HTMLParser
from config import CONFIG  # Import the config dictionary

# Suppress warnings
warnings.filterwarnings("ignore")

# Get today's date
today = datetime.today()

# Format the date in 'YYYY-MM-DD' format
today_date = today.strftime('%Y-%m-%d')

#Initialize parameters
# tickers = ['AAPL', 'MSFT', 'GOOGL']
# start = '2024-01-01'
# end = '2024-06-01'#today_date
# base_dir = 'sec_data'

tickers = CONFIG['TICKERS']
start = CONFIG['START_DATE']
end = CONFIG['END_DATE']
base_dir = CONFIG['BASE_DIR']

class SECDownloader:
    def __init__(self):
        self.downloader = dm.Downloader()
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)
        
    def set_user_agent(self, user_agent: str) -> None:
        """Set SEC user agent information."""
        try:
            self.downloader.set_headers(user_agent)
        except Exception as e:
            self.logger.error(f"Failed to set user agent: {str(e)}")
            raise
            
    async def download_filings(self, ticker: str, start_date: str, end_date: str, output_dir: str) -> None:
        """Download SEC filings with proper error handling."""
        try:
            await self.downloader.download(
                ticker=ticker,
                form=['10-K', '10-Q', '8-K'],  # Specify forms explicitly
                date=(start_date, end_date),
                output_dir=output_dir,
                return_urls=False  # Ensure we're downloading files
            )
        except ValueError as e:
            self.logger.error(f"Value error downloading filings for {ticker}: {str(e)}")
        except Exception as e:
            self.logger.error(f"Error downloading filings for {ticker}: {str(e)}")

    async def download_concepts(self, ticker: str, output_dir: str) -> None:
        """Download company concepts data with proper error handling."""
        try:
            await self.downloader.download_company_concepts(
                ticker=ticker,
                output_dir=output_dir
            )
        except Exception as e:
            self.logger.error(f"Error downloading company concepts for {ticker}: {str(e)}")

    async def process_ticker(self, ticker: str, start: str, end: str, base_dir: str) -> None:
        """Process a single ticker's downloads."""
        try:
            # Create directory structure
            ticker_dir = os.path.join(base_dir, ticker)
            filings_dir = os.path.join(ticker_dir, 'filings')
            concepts_dir = os.path.join(ticker_dir, 'company_concepts')
            
            os.makedirs(filings_dir, exist_ok=True)
            os.makedirs(concepts_dir, exist_ok=True)

            # Download both filings and concepts concurrently
            await asyncio.gather(
                self.download_filings(ticker, start, end, filings_dir),
                self.download_concepts(ticker, concepts_dir)
            )
            
        except Exception as e:
            self.logger.error(f"Failed to process ticker {ticker}: {str(e)}")

    async def download_all_data(self, tickers: List[str], start: str, end: str, base_dir: str = 'sec_data') -> None:
        """Download all SEC data for given tickers."""
        tasks = []
        for ticker in tickers:
            task = self.process_ticker(ticker, start, end, base_dir)
            tasks.append(task)
        
        await asyncio.gather(*tasks)

# Initialize downloader
sec_downloader = SECDownloader()

try:
    # Set user agent (required by SEC)
    sec_downloader.set_user_agent("Your Name your@email.com")
    
    # Create and run async download task
    async def run_downloads():
        await sec_downloader.download_all_data(tickers, start, end, base_dir)
        
    asyncio.run(run_downloads())
    
except KeyboardInterrupt:
    sec_downloader.logger.warning("\nDownload interrupted by user")
except Exception as e:
    sec_downloader.logger.error(f"Fatal error: {str(e)}")
finally:
    sec_downloader.logger.info("Download process completed")