# üìä SEC EDGAR Company Filings Scraper
### Built by Jimmy Tools | jimmytools.net

This notebook extracts SEC filings (10-K, 10-Q, 8-K, and more) for any public company.

**What you'll get:**
- List of all filings with dates and links
- Full text extraction from selected filings
- Downloadable CSV/Excel export
- Keyword search across filings

---

## üöÄ Quick Start
1. Click **Runtime ‚Üí Run all** (or press Ctrl+F9)
2. Enter the stock ticker when prompted (e.g., AAPL, TSLA, MSFT)
3. Download your results!

---

In [None]:
#@title üîß Setup (runs automatically)
import requests
import pandas as pd
import json
import re
import time
from datetime import datetime
from bs4 import BeautifulSoup
from google.colab import files
import warnings
warnings.filterwarnings('ignore')

# SEC requires a user-agent with contact info
HEADERS = {
    'User-Agent': 'JimmyTools Research Bot (contact@jimmytools.net)',
    'Accept-Encoding': 'gzip, deflate',
}

print("‚úÖ Setup complete! Ready to scrape SEC filings.")

In [None]:
#@title üìù Enter Company Ticker
ticker = input("Enter stock ticker (e.g., AAPL, TSLA, MSFT): ").strip().upper()
print(f"\nüîç Looking up {ticker}...")

In [None]:
#@title üè¢ Get Company CIK (SEC Identifier)

def get_cik(ticker):
    """Convert ticker to SEC CIK number"""
    url = 'https://www.sec.gov/files/company_tickers.json'
    response = requests.get(url, headers=HEADERS)
    data = response.json()
    
    for entry in data.values():
        if entry['ticker'].upper() == ticker.upper():
            cik = str(entry['cik_str']).zfill(10)
            return cik, entry['title']
    return None, None

cik, company_name = get_cik(ticker)

if cik:
    print(f"‚úÖ Found: {company_name}")
    print(f"   CIK: {cik}")
else:
    print(f"‚ùå Ticker '{ticker}' not found. Please check the symbol and try again.")
    raise SystemExit()

In [None]:
#@title üìã Select Filing Types

print("Which filings do you want to retrieve?\n")
print("1. 10-K (Annual Reports)")
print("2. 10-Q (Quarterly Reports)")
print("3. 8-K (Current Reports / Material Events)")
print("4. All of the above")
print("5. All filings (includes proxy statements, insider trading, etc.)")

choice = input("\nEnter choice (1-5): ").strip()

filing_types = {
    '1': ['10-K', '10-K/A'],
    '2': ['10-Q', '10-Q/A'],
    '3': ['8-K', '8-K/A'],
    '4': ['10-K', '10-K/A', '10-Q', '10-Q/A', '8-K', '8-K/A'],
    '5': None  # None means all types
}

selected_types = filing_types.get(choice, filing_types['4'])
if selected_types:
    print(f"\n‚úÖ Will retrieve: {', '.join(selected_types)}")
else:
    print(f"\n‚úÖ Will retrieve ALL filing types")

In [None]:
#@title üì• Fetch All Filings from SEC EDGAR

def get_filings(cik, filing_types=None):
    """Get all filings for a company from SEC EDGAR"""
    url = f'https://data.sec.gov/submissions/CIK{cik}.json'
    response = requests.get(url, headers=HEADERS)
    data = response.json()
    
    filings = []
    recent = data.get('filings', {}).get('recent', {})
    
    if not recent:
        return pd.DataFrame()
    
    for i in range(len(recent.get('accessionNumber', []))):
        form_type = recent['form'][i]
        
        # Filter by filing type if specified
        if filing_types and form_type not in filing_types:
            continue
            
        accession = recent['accessionNumber'][i].replace('-', '')
        primary_doc = recent['primaryDocument'][i]
        
        filing = {
            'form_type': form_type,
            'filing_date': recent['filingDate'][i],
            'accession_number': recent['accessionNumber'][i],
            'description': recent.get('primaryDocDescription', [''])[i] if i < len(recent.get('primaryDocDescription', [])) else '',
            'document_url': f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession}/{primary_doc}",
            'filing_url': f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession}",
        }
        filings.append(filing)
    
    return pd.DataFrame(filings)

print(f"\n‚è≥ Fetching filings for {company_name}...")
df_filings = get_filings(cik, selected_types)

print(f"\n‚úÖ Found {len(df_filings)} filings!\n")
print("Filing types breakdown:")
print(df_filings['form_type'].value_counts().to_string())

In [None]:
#@title üëÄ Preview Filings

print(f"\nüìã Most Recent Filings for {company_name}\n")
print(df_filings[['form_type', 'filing_date', 'description']].head(20).to_string(index=False))

In [None]:
#@title üìÑ Extract Full Text from Recent Filings (Optional)

extract_text = input("\nExtract full text from filings? This takes longer but enables keyword search. (y/n): ").strip().lower()

if extract_text == 'y':
    num_to_extract = input("How many recent filings to extract? (default: 10): ").strip()
    num_to_extract = int(num_to_extract) if num_to_extract.isdigit() else 10
    
    def extract_filing_text(url):
        """Extract text content from a filing"""
        try:
            response = requests.get(url, headers=HEADERS, timeout=30)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Remove scripts and styles
            for tag in soup(['script', 'style', 'meta', 'link']):
                tag.decompose()
            
            text = soup.get_text(separator=' ', strip=True)
            # Clean up whitespace
            text = re.sub(r'\s+', ' ', text)
            return text[:50000]  # Limit to 50k chars per filing
        except Exception as e:
            return f"Error extracting: {str(e)}"
    
    print(f"\n‚è≥ Extracting text from {num_to_extract} filings (this may take a minute)...\n")
    
    texts = []
    for i, row in df_filings.head(num_to_extract).iterrows():
        print(f"  Processing {row['form_type']} from {row['filing_date']}...")
        text = extract_filing_text(row['document_url'])
        texts.append(text)
        time.sleep(0.2)  # Be nice to SEC servers
    
    df_filings.loc[df_filings.index[:num_to_extract], 'full_text'] = texts
    print(f"\n‚úÖ Text extraction complete!")
else:
    print("\n‚è© Skipping text extraction.")

In [None]:
#@title üîç Search Filings for Keywords (Optional)

if 'full_text' in df_filings.columns:
    search_term = input("\nEnter keyword to search (or press Enter to skip): ").strip()
    
    if search_term:
        print(f"\nüîç Searching for '{search_term}'...\n")
        
        results = []
        for i, row in df_filings.iterrows():
            if pd.notna(row.get('full_text')):
                matches = len(re.findall(search_term, row['full_text'], re.IGNORECASE))
                if matches > 0:
                    results.append({
                        'form_type': row['form_type'],
                        'filing_date': row['filing_date'],
                        'matches': matches,
                        'url': row['document_url']
                    })
        
        if results:
            df_results = pd.DataFrame(results).sort_values('matches', ascending=False)
            print(f"Found '{search_term}' in {len(results)} filings:\n")
            print(df_results.to_string(index=False))
        else:
            print(f"No matches found for '{search_term}'")
else:
    print("\n‚ö†Ô∏è Text extraction was skipped. Run the extraction cell above to enable search.")

In [None]:
#@title üíæ Download Results

print("\nüì• Preparing downloads...\n")

# Create filename
timestamp = datetime.now().strftime('%Y%m%d')
filename_base = f"{ticker}_SEC_Filings_{timestamp}"

# Save as CSV
csv_filename = f"{filename_base}.csv"
df_filings.to_csv(csv_filename, index=False)
print(f"‚úÖ Saved: {csv_filename}")

# Save as Excel (without full_text column to keep file small)
excel_filename = f"{filename_base}.xlsx"
export_cols = [c for c in df_filings.columns if c != 'full_text']
df_filings[export_cols].to_excel(excel_filename, index=False)
print(f"‚úÖ Saved: {excel_filename}")

# Download files
print("\nüì• Downloading files to your computer...")
files.download(csv_filename)
files.download(excel_filename)

print("\nüéâ Done! Check your Downloads folder.")

---

## üìä Summary

You've successfully extracted SEC filings for your target company!

**What you got:**
- Complete list of filings with dates and direct links
- Full text extraction (if enabled)
- Keyword search results (if searched)
- CSV and Excel exports

**Next steps:**
- Open the CSV/Excel in your spreadsheet app
- Click the `document_url` links to read full filings
- Re-run this notebook for a different company

---

### üõ†Ô∏è More Tools at [jimmytools.net](https://jimmytools.net)

Questions? [@JimmyToolsAi on X](https://x.com/JimmyToolsAi)
