### EDGAR SEC API
An API for pulling publicly available financial information. This will be used for pulling 8-K filings, to start.

In [14]:
# Import necessary packages
import requests
import json
import re

In [15]:
# Set variables

# The user for making calls to the SEC API
user = "Noah Murphy ncmurphy27@gmail.com"

# Tickers - A list of tickers to pull information on
tickers = ["AAPL", "MSFT", "TSLA"]


In [16]:
# Set a user to the header (required by the SEC)
headers = {
    "User-Agent": user
}

# Create a funtion for calling the API to get filings
def get_filings_from_api(url, headers):
    try:
        response = requests.get(url, headers=headers, timeout=10)

        # Raise an error if HTTP code is not 200
        response.raise_for_status()

        data = response.json()

        # Print a very small section to confirm structure
        print("Success!")
        print("Company Name:", data.get("name"))
        
        # Return the result
        return data

    except requests.exceptions.HTTPError as e:
        print("HTTP error:", e)
    except requests.exceptions.RequestException as e:
        print("Request failed:", e)
    except json.JSONDecodeError:
        print("Failed to decode JSON.")
        
# Create a function for getting the most recent filing of a form_type
def get_most_recent_filing(recent, form_type):
    forms = recent.get("form", [])
    dates = recent.get("filingDate", [])
    accessions = recent.get("accessionNumber", [])
    primary_docs = recent.get("primaryDocument", [])

    # Collect all matching filings
    matches = []
    for i, f in enumerate(forms):
        if f == form_type:
            matches.append((dates[i], accessions[i], primary_docs[i]))

    if not matches:
        return None

    # Sort by date descending
    matches.sort(key=lambda x: x[0], reverse=True)

    # return (date, accession, primary_doc)
    return matches[0]

In [17]:
# Pull the SEC mapping of tickers → CIKs
url = "https://www.sec.gov/files/company_tickers.json"

# Make the call
try:
        response = requests.get(url, headers=headers, timeout=10)

        # Raise an error if HTTP code is not 200
        response.raise_for_status()

        tickers_data = response.json()

        # Print a very small section to confirm structure
        print("Success!")

except requests.exceptions.HTTPError as e:
    print("HTTP error:", e)
except requests.exceptions.RequestException as e:
    print("Request failed:", e)
except json.JSONDecodeError:
    print("Failed to decode JSON.")

# Build dictionary: ticker (upper) → 10-digit CIK string
ticker_to_cik = {
    item["ticker"].upper(): str(item["cik_str"]).zfill(10)
    for item in tickers_data.values()
}

# Example usage
for t in tickers:
    cik = ticker_to_cik.get(t.upper())
    print(f"{t} → {cik}")

Success!
AAPL → 0000320193
MSFT → 0000789019
TSLA → 0001318605


In [18]:
# Now get some data - starting with one cik
cik = ticker_to_cik[tickers[0]]

# Pull recent filings for a ticker
url = f"https://data.sec.gov/submissions/CIK{cik}.json"

data = get_filings_from_api(url, headers)

Success!
Company Name: Apple Inc.


In [19]:
# Get the most recent desired 10-K and 10-Q filings
recent = data["filings"]["recent"]

latest_10k = get_most_recent_filing(recent, "10-K")
latest_10q = get_most_recent_filing(recent, "10-Q")

In [20]:
print("Latest 10-K:", latest_10k)
print("Latest 10-Q:", latest_10q)

Latest 10-K: ('2025-10-31', '0000320193-25-000079', 'aapl-20250927.htm')
Latest 10-Q: ('2025-08-01', '0000320193-25-000073', 'aapl-20250628.htm')


In [23]:
# Get the most recent 8-K filings
filings = data.get("filings", {}).get("recent", {})

forms = filings.get("form", [])
dates = filings.get("filingDate", [])
accessions = filings.get("accessionNumber", [])
documents = filings.get("primaryDocument", [])

eight_ks = []

for i, form in enumerate(forms):
    if form == "8-K":
        eight_ks.append({
            "form": form,
            "date": dates[i],
            "accession": accessions[i],
            "primary_document": documents[i]
        })

print(f"Found {len(eight_ks)} recent 8-K filings.\n")

Found 104 recent 8-K filings.



In [29]:
def get_filing_index(cik, accession, headers):
    acc_nodash = accession.replace("-", "")
    url = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{acc_nodash}/index.json"
    
    resp = requests.get(url, headers=headers)
    resp.raise_for_status()
    return resp.json()


In [39]:
import re

def extract_ex99_exhibits(index_json):
    exhibits = []
    items = index_json.get("directory", {}).get("item", [])

    # Regex that matches:
    # ex-99, ex_99, ex99, exhibit99 (with optional .1, -1, _1 versions)
    pattern = re.compile(r"(exhibit|ex)[-_]?99([._-]?\d+)?", re.IGNORECASE)

    for file in items:
        name = file["name"].lower()

        if pattern.search(name):
            exhibits.append(file)

    return exhibits


In [46]:
# Check through accessions for the most recent -earnings- 8K (contains exhibits)
def identify_latest_earnings_8k(recent_eight_ks):
    for eight_k in recent_eight_ks:
        index = get_filing_index(cik, eight_k["accession"], headers)

        exhibits = extract_ex99_exhibits(index_json)

        if len(exhibits) > 0:
            print("Earnings 8-K found.")
            print(f"Filing: {eight_k}")
            print(f"Exhibits: \n{exhibits}")
            return index
        else:
            print("No earnings 8-K found.")

In [47]:
identify_latest_earnings_8k(eight_ks)

Earnings 8-K found.
Filing: {'form': '8-K', 'date': '2025-10-30', 'accession': '0000320193-25-000077', 'primary_document': 'aapl-20251030.htm'}
Exhibits: 
[{'last-modified': '2025-10-30 16:30:35', 'name': 'a8-kex991q4202509272025.htm', 'type': 'text.gif', 'size': '198222'}]


{'directory': {'item': [{'last-modified': '2025-10-30 16:30:35',
    'name': '0000320193-25-000077-index-headers.html',
    'type': 'text.gif',
    'size': ''},
   {'last-modified': '2025-10-30 16:30:35',
    'name': '0000320193-25-000077-index.html',
    'type': 'text.gif',
    'size': ''},
   {'last-modified': '2025-10-30 16:30:35',
    'name': '0000320193-25-000077.txt',
    'type': 'text.gif',
    'size': ''},
   {'last-modified': '2025-10-30 16:30:35',
    'name': '0000320193-25-000077-xbrl.zip',
    'type': 'compressed.gif',
    'size': '27638'},
   {'last-modified': '2025-10-30 16:30:35',
    'name': 'a8-kex991q4202509272025.htm',
    'type': 'text.gif',
    'size': '198222'},
   {'last-modified': '2025-10-30 16:30:35',
    'name': 'aapl-20251030.htm',
    'type': 'text.gif',
    'size': '39054'},
   {'last-modified': '2025-10-30 16:30:35',
    'name': 'aapl-20251030.xsd',
    'type': 'text.gif',
    'size': '3856'},
   {'last-modified': '2025-10-30 16:30:35',
    'name': 'aapl-