In [1]:
import pandas as pd
import numpy as np
import glob
import pathlib
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

Read the list of tickers from [https://www.sec.gov/file/company-tickers](https://www.sec.gov/file/company-tickers).

The current SEC guidelines limit users to a total of no more than 10 requests per second, regardless of the number of machines used to submit requests.

A user-agent must be declared in the request header in the following format:

```
Sample Company Name AdminContact@<sample company domain>.com
```

In [2]:
import urllib.request, json
import requests

sec_request_headers = {
    'User-Agent': 'University of Illinois ypark32@illinois.edu',
}

In [None]:
tickers_url = 'https://www.sec.gov/files/company_tickers.json'

response = requests.get(tickers_url, headers=sec_request_headers)
cik_dict = response.json()

Print the first five key-value pairs.

In [3]:
{k: cik_dict[k] for k in list(cik_dict)[:5]}

{'0': {'cik_str': 320193, 'ticker': 'AAPL', 'title': 'Apple Inc.'},
 '1': {'cik_str': 789019, 'ticker': 'MSFT', 'title': 'MICROSOFT CORP'},
 '2': {'cik_str': 1045810, 'ticker': 'NVDA', 'title': 'NVIDIA CORP'},
 '3': {'cik_str': 1652044, 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
 '4': {'cik_str': 1018724, 'ticker': 'AMZN', 'title': 'AMAZON COM INC'}}

In [4]:
df = pd.DataFrame(
    data=cik_dict.values()
)

In [5]:
df

Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1045810,NVDA,NVIDIA CORP
3,1652044,GOOGL,Alphabet Inc.
4,1018724,AMZN,AMAZON COM INC
...,...,...,...
10162,1849820,KITTW,"Nauticus Robotics, Inc."
10163,1276187,ET-PI,Energy Transfer LP
10164,1571283,REXR-PC,"Rexford Industrial Realty, Inc."
10165,1855756,LILMW,Lilium N.V.


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10167 entries, 0 to 10166
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   cik_str  10167 non-null  int64 
 1   ticker   10167 non-null  object
 2   title    10167 non-null  object
dtypes: int64(1), object(2)
memory usage: 238.4+ KB


The `"cik_str"` is an entity's Central Index Key (CIK). To use CIK values inside a query, the key must be zero-padded to be 10-digits long.

Pad the values in the `"cik_str"` column.

In [7]:
df['cik_str'] = df['cik_str'].astype(str).str.pad(width=10, fillchar='0')
df.head(10)

Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1045810,NVDA,NVIDIA CORP
3,1652044,GOOGL,Alphabet Inc.
4,1018724,AMZN,AMAZON COM INC
5,1326801,META,"Meta Platforms, Inc."
6,1067983,BRK-B,BERKSHIRE HATHAWAY INC
7,1046179,TSM,TAIWAN SEMICONDUCTOR MANUFACTURING CO LTD
8,1730168,AVGO,Broadcom Inc.
9,1318605,TSLA,"Tesla, Inc."


In [8]:
AAPL_cik = df.query("ticker == 'AAPL'").iloc[0]['cik_str']

AAPL_cik

'0000320193'

### Company Facts

In [11]:
company_facts_url = f'https://data.sec.gov/api/xbrl/companyfacts/CIK{AAPL_cik}.json'

response = requests.get(company_facts_url, headers=sec_request_headers)
my_response = response.json()

print(my_response)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

