In [1]:
# Imports
import sys
import os

# Add the parent directory of "notebooks" to the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from downloaders.sec_downloader import SECDownloader

# Initialize the SECDownloader
downloader = SECDownloader(delay=1.0)  # Default polite 1 second delay

In [4]:
# Test downloading a raw HTML page from SEC
test_url = "https://www.sec.gov/Archives/edgar/data/1835632/000183563225000051/q425_8kx212025ex-991.htm"  # Example: Apple 10-Q

try:
    html_content = downloader.download_html(test_url)
    print(f"✅ Download successful. HTML content length: {len(html_content)} characters.")
    print(html_content[:500])  # Preview first 500 characters
except Exception as e:
    print(f"❌ Error during HTML download: {e}")

✅ Download successful. HTML content length: 444339 characters.
<DOCUMENT>
<TYPE>EX-99.1
<SEQUENCE>2
<FILENAME>q425_8kx212025ex-991.htm
<DESCRIPTION>EXHIBIT 99.1
<TEXT>
<html><head>
<!-- Document created using Wdesk -->
<!-- Copyright 2025 Workiva -->
<title>Document</title></head><body><div id="i2099fba01a62404a8f1f7e92b904f3a8_1"></div><div style="min-height:36pt;width:100%"><div style="text-align:center"><font><br></font></div></div><div style="text-align:right"><font style="color:#000000;font-family:'Times New Roman',sans-serif;font-size:10pt;font-weight


In [5]:
# Test fetching submissions for Apple (CIK: 320193)
cik = "320193"  # Apple Inc.

try:
    downloader.fetch_submissions(cik)
    print("✅ Submissions JSON fetched successfully.")
except Exception as e:
    print(f"❌ Error during submissions fetch: {e}")


✅ Submissions data retrieved successfully.
✅ Submissions JSON fetched successfully.


In [6]:
# Test extracting recent filings
try:
    downloader.extract_recent_filings()
    print(f"✅ Recent filings extracted. Total: {len(downloader.recent_filings.get('accessionNumber', []))}")
except Exception as e:
    print(f"❌ Error during recent filings extraction: {e}")


✅ 1000 recent filings extracted.
✅ Recent filings extracted. Total: 1000


In [7]:
# Test building filing URLs (e.g., only '8-K' and '10-K' forms)
try:
    filings = downloader.build_filing_urls(forms_filter=["8-K", "10-K"])
    print(f"✅ {len(filings)} filings matched the filter.")
    for filing in filings[:3]:  # Show first 3 filings as example
        print(filing)
except Exception as e:
    print(f"❌ Error during filing URL building: {e}")


✅ 112 filings matched filter criteria.
✅ 112 filings matched the filter.
{'accessionNumber': '0001140361-25-005876', 'form': '8-K', 'filingDate': '2025-02-25', 'primaryDocument': 'ef20044022_8k.htm', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/1140361/000114036125005876/ef20044022_8k.htm'}
{'accessionNumber': '0000320193-25-000007', 'form': '8-K', 'filingDate': '2025-01-30', 'primaryDocument': 'aapl-20250130.htm', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/320193/000032019325000007/aapl-20250130.htm'}
{'accessionNumber': '0001140361-25-000228', 'form': '8-K', 'filingDate': '2025-01-03', 'primaryDocument': 'ef20040370_8k.htm', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/1140361/000114036125000228/ef20040370_8k.htm'}


In [8]:
# Adjust polite delay (optional)
downloader.delay = 0.5  # Change to 0.5 seconds if you want faster testing
print(f"✅ Throttle delay updated to {downloader.delay} seconds.")


✅ Throttle delay updated to 0.5 seconds.


In [9]:
# Test fetching submissions and filings for Microsoft (CIK: 789019)
cik = "789019"  # Microsoft Corporation

try:
    downloader.fetch_submissions(cik)
    downloader.extract_recent_filings()
    filings = downloader.build_filing_urls(forms_filter=["8-K", "10-K"])
    print(f"✅ {len(filings)} filings matched the filter for Microsoft.")
    for filing in filings[:3]:  # Show first 3 filings
        print(filing)
except Exception as e:
    print(f"❌ Error during Microsoft filing fetch: {e}")


✅ Submissions data retrieved successfully.
✅ 1001 recent filings extracted.
✅ 71 filings matched filter criteria.
✅ 71 filings matched the filter for Microsoft.
{'accessionNumber': '0000950170-25-010484', 'form': '8-K', 'filingDate': '2025-01-29', 'primaryDocument': 'msft-20250129.htm', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/950170/000095017025010484/msft-20250129.htm'}
{'accessionNumber': '0001193125-25-010492', 'form': '8-K', 'filingDate': '2025-01-22', 'primaryDocument': 'd929748d8k.htm', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/1193125/000119312525010492/d929748d8k.htm'}
{'accessionNumber': '0001193125-24-275524', 'form': '8-K', 'filingDate': '2024-12-11', 'primaryDocument': 'd865252d8k.htm', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/1193125/000119312524275524/d865252d8k.htm'}


In [10]:
# Test building filing URLs with no filter (return everything)
try:
    filings_all = downloader.build_filing_urls()
    print(f"✅ {len(filings_all)} filings retrieved without any form filter.")
    for filing in filings_all[:3]:  # Show first 3 filings
        print(filing)
except Exception as e:
    print(f"❌ Error during all filings retrieval: {e}")


✅ 1001 filings URLs built.
✅ 1001 filings retrieved without any form filter.
{'accessionNumber': '0001062993-25-007602', 'form': '4', 'filingDate': '2025-04-16', 'primaryDocument': 'xslF345X05/form4.xml', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/1062993/000106299325007602/xslF345X05/form4.xml'}
{'accessionNumber': '0000950170-25-045366', 'form': '11-K', 'filingDate': '2025-03-26', 'primaryDocument': 'msft-11k-espp-2024.htm', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/950170/000095017025045366/msft-11k-espp-2024.htm'}
{'accessionNumber': '0001062993-25-006285', 'form': '3', 'filingDate': '2025-03-25', 'primaryDocument': 'xslF345X02/form3.xml', 'filing_url': 'https://www.sec.gov/Archives/edgar/data/1062993/000106299325006285/xslF345X02/form3.xml'}
