d_secgov_at_tickers.ipynb

In [None]:
"""  
purpose: retrieves basic ticker data, three (3) vars; 'cik_str', 'ticker', 'title',
+for any company (stock) ticker ever registered with the SEC, both active and inactive.

api endpoint: https://www.sec.gov/files/company_tickers.json
api endpoint updates: The JSON structures are updated throughout the day, in real time, as submissions are disseminated.
+In addition, a bulk ZIP file is available to download all the JSON structures for an API. This ZIP file is updated and republished nightly at approximately 3:00 a.m. ET.

docs: https://www.sec.gov/search-filings/edgar-application-programming-interfaces
columns: ['cik_str', 'ticker', 'title']
length: 9998

sample json output:
{
    "0": {
        "cik_str": 1045810,
        "ticker": "NVDA",
        "title": "NVIDIA CORP"
    },
    "1": {
        "cik_str": 320193,
        "ticker": "AAPL",
        "title": "Apple Inc."
    },
    "2": {
        "cik_str": 789019,
        "ticker": "MSFT",
        "title": "MICROSOFT CORP"
    }
}

smample df output:
      cik_str ticker           title
0  0001045810   NVDA     NVIDIA CORP
1  0000320193   AAPL      Apple Inc.
2  0000789019   MSFT  MICROSOFT CORP
3  0001018724   AMZN  AMAZON COM INC

"""

"  \n- retrieves basic ticker data, three (3) vars; 'cik_str', 'ticker', 'title',\n- for any company (stock) ticker ever registered with the SEC, both active and inactive.\n- https://www.sec.gov/search-filings/edgar-application-programming-interfaces\n\nsample json output:\n\n\n\nsmample df output:\n      cik_str ticker           title\n0  0001045810   NVDA     NVIDIA CORP\n1  0000320193   AAPL      Apple Inc.\n2  0000789019   MSFT  MICROSOFT CORP\n3  0001018724   AMZN  AMAZON COM INC\n\n\n\n\n\n"

In [21]:
import requests
import pandas as pd
import json


In [22]:
# get "all-time" basic stock ticker data from SEC server endpoint
def get_sec_tickers() -> pd.DataFrame:
    """
    Retrieve company tickers from SEC endpoint and return as DataFrame
    Returns DataFrame with columns: cik_str, ticker, title
    """
    headers = {
        'User-Agent': 'ServiceNow fred.luddy@servicenow.com', 
        'Accept': 'application/json',
        'Host': 'www.sec.gov'
    }
    
    try:
        response = requests.get(
            'https://www.sec.gov/files/company_tickers.json',
            headers=headers
        )
        
        if response.status_code == 403:
            raise Exception("SEC requires a proper User-Agent. \nPlease modify the User-Agent header with your company/contact information.")
        
        response.raise_for_status()

        print(json.dumps(response.json(), indent=4))
        
        df = pd.DataFrame.from_dict(response.json(), orient='index')
        df['cik_str'] = df['cik_str'].astype(str).str.zfill(10)
        return df
        
    except requests.exceptions.RequestException as e:
        raise Exception(f"Error accessing SEC API: {str(e)}")

In [23]:
sec_all_tickers_df = get_sec_tickers()

print(sec_all_tickers_df.columns)
print(len(sec_all_tickers_df))

{
    "0": {
        "cik_str": 1045810,
        "ticker": "NVDA",
        "title": "NVIDIA CORP"
    },
    "1": {
        "cik_str": 320193,
        "ticker": "AAPL",
        "title": "Apple Inc."
    },
    "2": {
        "cik_str": 789019,
        "ticker": "MSFT",
        "title": "MICROSOFT CORP"
    },
    "3": {
        "cik_str": 1018724,
        "ticker": "AMZN",
        "title": "AMAZON COM INC"
    },
    "4": {
        "cik_str": 1652044,
        "ticker": "GOOGL",
        "title": "Alphabet Inc."
    },
    "5": {
        "cik_str": 1326801,
        "ticker": "META",
        "title": "Meta Platforms, Inc."
    },
    "6": {
        "cik_str": 1318605,
        "ticker": "TSLA",
        "title": "Tesla, Inc."
    },
    "7": {
        "cik_str": 1067983,
        "ticker": "BRK-B",
        "title": "BERKSHIRE HATHAWAY INC"
    },
    "8": {
        "cik_str": 1046179,
        "ticker": "TSM",
        "title": "TAIWAN SEMICONDUCTOR MANUFACTURING CO LTD"
    },
    "9": {
    

In [24]:
# display(sec_all_tickers_df)
print(sec_all_tickers_df.head(3))

      cik_str ticker           title
0  0001045810   NVDA     NVIDIA CORP
1  0000320193   AAPL      Apple Inc.
2  0000789019   MSFT  MICROSOFT CORP
