In [1]:
# import libraries

import requests
import pandas as pd

In [2]:
# create a request header

headers = {'User-Agent':"mmixon@seattleu.edu"}

In [5]:
# get all companies data

companyTickers = requests.get("https://www.sec.gov/files/company_tickers.json", headers=headers)

In [6]:
# print(companyTickers.json()['0']['cik_str'])

1045810


In [7]:
# set company's CIK number

companyCIK = pd.DataFrame.from_dict(companyTickers.json(), orient='index')

print(companyCIK)

       cik_str ticker                              title
0      1045810   NVDA                        NVIDIA CORP
1       789019   MSFT                     MICROSOFT CORP
2       320193   AAPL                         Apple Inc.
3      1018724   AMZN                     AMAZON COM INC
4      1652044  GOOGL                      Alphabet Inc.
...        ...    ...                                ...
10055  1949257  RDPTF        Radiopharm Theranostics Ltd
10056  1733968  WLGMF      West Red Lake Gold Mines Ltd.
10057  1991946  CGBSW             Crown LNG Holdings Ltd
10058  2038919  DWWYF        Dowway Holdings Limited/ADR
10059  2042556  SPGDF  Springer Nature AG & Co. KGaA/ADR

[10060 rows x 3 columns]


In [8]:
# add required leading zeros

companyCIK['cik_str']=companyCIK['cik_str'].astype(str).str.zfill(10)

print(companyCIK)

          cik_str ticker                              title
0      0001045810   NVDA                        NVIDIA CORP
1      0000789019   MSFT                     MICROSOFT CORP
2      0000320193   AAPL                         Apple Inc.
3      0001018724   AMZN                     AMAZON COM INC
4      0001652044  GOOGL                      Alphabet Inc.
...           ...    ...                                ...
10055  0001949257  RDPTF        Radiopharm Theranostics Ltd
10056  0001733968  WLGMF      West Red Lake Gold Mines Ltd.
10057  0001991946  CGBSW             Crown LNG Holdings Ltd
10058  0002038919  DWWYF        Dowway Holdings Limited/ADR
10059  0002042556  SPGDF  Springer Nature AG & Co. KGaA/ADR

[10060 rows x 3 columns]


In [9]:
# NVIDIA CORP CIK number

cik = companyCIK[0:1].cik_str[0]

print(cik)

0001045810


In [11]:
# SEC filing API call (filing another request)

companyFiling = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)

print(companyFiling.json()['filings'].keys())

dict_keys(['recent', 'files'])


In [12]:
# view all filings by creating dataframe, retrieving most recent

allFilings = pd.DataFrame.from_dict(companyFiling.json()['filings']['recent'])

print(allFilings)

          accessionNumber  filingDate  reportDate        acceptanceDateTime  \
0    0001965301-25-000098  2025-07-11              2025-07-11T20:58:27.000Z   
1    0001921094-25-000778  2025-07-11              2025-07-11T20:54:38.000Z   
2    0001197649-25-000006  2025-07-10  2025-07-08  2025-07-10T23:30:00.000Z   
3    0001045810-25-000188  2025-07-10  2025-07-08  2025-07-10T23:26:38.000Z   
4    0001921094-25-000777  2025-07-10              2025-07-10T20:58:40.000Z   
..                    ...         ...         ...                       ...   
995  0001045810-17-000186  2017-12-15  2017-12-13  2017-12-15T22:45:32.000Z   
996  0001045810-17-000184  2017-12-15  2017-12-13  2017-12-15T22:29:24.000Z   
997  0001045810-17-000182  2017-12-15  2017-12-13  2017-12-15T22:23:59.000Z   
998  0001045810-17-000180  2017-12-15  2017-12-13  2017-12-15T22:16:29.000Z   
999  0001045810-17-000178  2017-12-15  2017-12-15  2017-12-15T21:31:03.000Z   

    act form fileNumber filmNumber           items 

In [14]:
# print columns for cleaning up reference

print(allFilings.columns)

Index(['accessionNumber', 'filingDate', 'reportDate', 'acceptanceDateTime',
       'act', 'form', 'fileNumber', 'filmNumber', 'items', 'core_type', 'size',
       'isXBRL', 'isInlineXBRL', 'primaryDocument', 'primaryDocDescription'],
      dtype='object')


In [16]:
# viewing the individual accessionNumber, reportDate, and form  (top 50)

print(allFilings[['accessionNumber', 'reportDate', 'form']].head(50))

         accessionNumber  reportDate form
0   0001965301-25-000098              144
1   0001921094-25-000778              144
2   0001197649-25-000006  2025-07-08    4
3   0001045810-25-000188  2025-07-08    4
4   0001921094-25-000777              144
5   0001965301-25-000097              144
6   0001921094-25-000773              144
7   0001965301-25-000096              144
8   0001921094-25-000771              144
9   0001965301-25-000095              144
10  0001197649-25-000004  2025-07-02    4
11  0001045810-25-000186  2025-07-02    4
12  0001921094-25-000760              144
13  0001965301-25-000094              144
14  0001921094-25-000757              144
15  0001921094-25-000751              144
16  0001965301-25-000092              144
17  0001045810-25-000183  2025-06-30    4
18  0001045810-25-000182  2025-06-27    4
19  0001921094-25-000746              144
20  0001965301-25-000089              144
21  0001045810-25-000179  2025-06-25  8-K
22  0001965301-25-000088          

In [17]:
# pull 11 individual filing from 'allFilings' column, using iloc method

print(allFilings.iloc[11])

accessionNumber                        0001045810-25-000186
filingDate                                       2025-07-07
reportDate                                       2025-07-02
acceptanceDateTime                 2025-07-08T00:29:56.000Z
act                                                        
form                                                      4
fileNumber                                                 
filmNumber                                                 
items                                                      
core_type                                                 4
size                                                  19442
isXBRL                                                    0
isInlineXBRL                                              0
primaryDocument          xslF345X05/wk-form4_1751934586.xml
primaryDocDescription                                FORM 4
Name: 11, dtype: object


In [19]:
# using data from NVIDIA, define a function to retrieve financial data

def get_data(cik, concept):
    url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/us-gaap/{concept}.json"
    res = requests.get(url, headers=headers)
    if res.status_code == 200:
        try:
            df = pd.DataFrame(res.json()['units']['USD'])
            df['concept'] = concept
            return df
        except:
            return pd.DataFrame()
    else:
        return pd.DataFrame()

In [20]:
# define financial tags

tags = [
    'RevenueFromContractWithCustomerExcludingAssessedTax',  # revenue
    'Assets',
    'NetIncomeLoss',
    'Liabilities',
    'CashAndCashEquivalentsAtCarryingValue'
]

In [22]:
# combine into one DataFrame

all_data = pd.DataFrame()
for tag in tags:
    df = get_data(cik, tag)
    if not df.empty:
        df['concept'] = tag
        all_data = pd.concat([all_data, df], ignore_index=True)

In [23]:
# cleaning up reference

all_data = all_data[['start', 'end', 'val', 'concept', 'accn', 'fy', 'fp', 'form', 'filed']]
all_data.rename(columns={
    'start': 'Start Date',
    'end': 'End Date',
    'val': 'Value',
    'concept': 'Concept',
    'accn': 'Accession Number',
    'fy': 'Fiscal Year',
    'fp': 'Fiscal Period',
    'form': 'Form',
    'filed': 'Filed Date'
}, inplace=True)

In [26]:
# view top 20 rows

print(all_data.head(20))

    Start Date    End Date        Value  \
0   2016-02-01  2017-01-29   6910000000   
1   2017-01-30  2017-04-30   1937000000   
2   2017-05-01  2017-07-30   2230000000   
3   2017-07-31  2017-10-29   2636000000   
4   2017-01-30  2018-01-28   9714000000   
5   2017-01-30  2018-01-28   9714000000   
6   2017-10-30  2018-01-28   2911000000   
7   2018-01-29  2018-04-29   3207000000   
8   2018-01-29  2018-04-29   3207000000   
9   2018-04-30  2018-07-29   3123000000   
10  2018-04-30  2018-07-29   3123000000   
11  2018-07-30  2018-10-28   3181000000   
12  2018-07-30  2018-10-28   3181000000   
13  2018-01-29  2019-01-27  11716000000   
14  2018-01-29  2019-01-27  11716000000   
15  2018-01-29  2019-01-27  11716000000   
16  2018-10-29  2019-01-27   2205000000   
17  2018-10-29  2019-01-27   2205000000   
18  2019-01-28  2019-04-28   2220000000   
19  2019-04-29  2019-07-28   2579000000   

                                              Concept      Accession Number  \
0   RevenueFromCo