In [3]:
%load_ext lab_black

In [4]:
import requests
import pandas as pd
from pandas import json_normalize
import json
import matplotlib.pyplot as plt

In [5]:
plt.style.use("seaborn")

In [6]:
# SEC Edgar database API
# https://www.sec.gov/edgar/sec-api-documentation

# history of submissions of financial report fillings
url_submissions = "https://data.sec.gov/submissions/"

# a few quarters of financial reports
url_facts = "https://data.sec.gov/api/xbrl/companyfacts/"

# hisotry of values for a specific financial report entry (e.g. Revenues)
# for a specific company
url_concepts = "https://data.sec.gov/api/xbrl/companyconcepts/"

# ...
url_frames = "https://data.sec.gov/api/xbrl/frames/"

In [54]:
# url_facts
# test with Apple Inc.
# Apple Inc. CIK found manually from https://www.sec.gov/edgar/searchedgar/cik.htm

apple_cik = "0000320193"
headers = {"User-Agent": "Matthias Galipaud (individual) aborgeaud@gmail.com"}


def get_facts(cik: str, headers) -> dict:
    assert len(cik) == 10
    resp = requests.get(
        url_facts + "CIK" + apple_cik + ".json", headers=headers, timeout=5
    )
    resp_json = None
    if resp.status_code < 400:
        resp_json = resp.json()
    return resp_json


In [9]:
resp_json = get_facts(apple_cik, headers)
resp_json.keys()

dict_keys(['cik', 'entityName', 'facts'])

In [17]:
resp_json["facts"].keys()

dict_keys(['dei', 'us-gaap'])

In [20]:
resp_json["facts"]["dei"].keys()

dict_keys(['EntityCommonStockSharesOutstanding', 'EntityPublicFloat'])

In [22]:
resp_json["facts"]["dei"]["EntityCommonStockSharesOutstanding"].keys()

dict_keys(['label', 'description', 'units'])

In [26]:
resp_json["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"].keys()

dict_keys(['shares'])

In [28]:
df = json_normalize(
    resp_json["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"]["shares"]
)
print(df)

           end          val                  accn    fy  fp    form  \
0   2009-06-27    895816758  0001193125-09-153165  2009  Q3    10-Q   
1   2009-10-16    900678473  0001193125-09-214859  2009  FY    10-K   
2   2009-10-16    900678473  0001193125-10-012091  2009  FY  10-K/A   
3   2010-01-15    906794589  0001193125-10-012085  2010  Q1    10-Q   
4   2010-04-09    909938383  0001193125-10-088957  2010  Q2    10-Q   
5   2010-07-09    913562880  0001193125-10-162840  2010  Q3    10-Q   
6   2010-10-15    917307099  0001193125-10-238044  2010  FY    10-K   
7   2011-01-07    921278012  0001193125-11-010144  2011  Q1    10-Q   
8   2011-04-08    924754561  0001193125-11-104388  2011  Q2    10-Q   
9   2011-07-08    927090886  0001193125-11-192493  2011  Q3    10-Q   
10  2011-10-14    929409000  0001193125-11-282113  2011  FY    10-K   
11  2012-01-13    932370000  0001193125-12-023398  2012  Q1    10-Q   
12  2012-04-13    935062000  0001193125-12-182321  2012  Q2    10-Q   
13  20

In [29]:
gaap = resp_json["facts"]["us-gaap"]

In [30]:
gaap.keys()

dict_keys(['AccountsPayable', 'AccountsPayableCurrent', 'AccountsReceivableNetCurrent', 'AccruedIncomeTaxesCurrent', 'AccruedIncomeTaxesNoncurrent', 'AccruedLiabilities', 'AccruedLiabilitiesCurrent', 'AccruedMarketingCostsCurrent', 'AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment', 'AccumulatedOtherComprehensiveIncomeLossAvailableForSaleSecuritiesAdjustmentNetOfTax', 'AccumulatedOtherComprehensiveIncomeLossCumulativeChangesInNetGainLossFromCashFlowHedgesEffectNetOfTax', 'AccumulatedOtherComprehensiveIncomeLossForeignCurrencyTranslationAdjustmentNetOfTax', 'AccumulatedOtherComprehensiveIncomeLossNetOfTax', 'AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue', 'AdjustmentsToAdditionalPaidInCapitalTaxEffectFromShareBasedCompensation', 'AdvertisingExpense', 'AllocatedShareBasedCompensationExpense', 'AllowanceForDoubtfulAccountsReceivableCurrent', 'AmortizationOfIntangibleAssets', 'AntidilutiveSecuritiesExcludedFromCompu

In [35]:
gaap["Revenues"]

{'label': 'Revenues',
 'description': 'Amount of revenue recognized from goods sold, services rendered, insurance premiums, or other activities that constitute an earning process. Includes, but is not limited to, investment and interest income before deduction of interest expense when recognized as a component of revenue, and sales and trading gain (loss).',
 'units': {'USD': [{'start': '2015-09-27',
    'end': '2016-09-24',
    'val': 215639000000,
    'accn': '0000320193-18-000145',
    'fy': 2018,
    'fp': 'FY',
    'form': '10-K',
    'filed': '2018-11-05',
    'frame': 'CY2016'},
   {'start': '2016-09-25',
    'end': '2016-12-31',
    'val': 78351000000,
    'accn': '0000320193-18-000145',
    'fy': 2018,
    'fp': 'FY',
    'form': '10-K',
    'filed': '2018-11-05',
    'frame': 'CY2016Q4'},
   {'start': '2017-01-01',
    'end': '2017-04-01',
    'val': 52896000000,
    'accn': '0000320193-18-000145',
    'fy': 2018,
    'fp': 'FY',
    'form': '10-K',
    'filed': '2018-11-05',

In [38]:
revenues = json_normalize(gaap["Revenues"]["units"]["USD"])
print(revenues)

         start         end           val                  accn    fy  fp  \
0   2015-09-27  2016-09-24  215639000000  0000320193-18-000145  2018  FY   
1   2016-09-25  2016-12-31   78351000000  0000320193-18-000145  2018  FY   
2   2017-01-01  2017-04-01   52896000000  0000320193-18-000145  2018  FY   
3   2017-04-02  2017-07-01   45408000000  0000320193-18-000145  2018  FY   
4   2016-09-25  2017-09-30  229234000000  0000320193-18-000145  2018  FY   
5   2017-07-02  2017-09-30   52579000000  0000320193-18-000145  2018  FY   
6   2017-10-01  2017-12-30   88293000000  0000320193-18-000145  2018  FY   
7   2017-12-31  2018-03-31   61137000000  0000320193-18-000145  2018  FY   
8   2018-04-01  2018-06-30   53265000000  0000320193-18-000145  2018  FY   
9   2017-10-01  2018-09-29  265595000000  0000320193-18-000145  2018  FY   
10  2018-07-01  2018-09-29   62900000000  0000320193-18-000145  2018  FY   

    form       filed     frame  
0   10-K  2018-11-05    CY2016  
1   10-K  2018-11-05 

In [39]:
assets = json_normalize(gaap["Assets"]["units"]["USD"])
print(assets)

            end           val                  accn    fy  fp    form  \
0    2008-09-27   39572000000  0001193125-09-153165  2009  Q3    10-Q   
1    2008-09-27   39572000000  0001193125-09-214859  2009  FY    10-K   
2    2008-09-27   36171000000  0001193125-10-012091  2009  FY  10-K/A   
3    2008-09-27   36171000000  0001193125-10-238044  2010  FY    10-K   
4    2009-06-27   48140000000  0001193125-09-153165  2009  Q3    10-Q   
..          ...           ...                   ...   ...  ..     ...   
101  2020-09-26  323888000000  0000320193-21-000056  2021  Q2    10-Q   
102  2020-09-26  323888000000  0000320193-21-000065  2021  Q3    10-Q   
103  2020-12-26  354054000000  0000320193-21-000010  2021  Q1    10-Q   
104  2021-03-27  337158000000  0000320193-21-000056  2021  Q2    10-Q   
105  2021-06-26  329840000000  0000320193-21-000065  2021  Q3    10-Q   

          filed      frame  
0    2009-07-22        NaN  
1    2009-10-27        NaN  
2    2010-01-25        NaN  
3    20

In [40]:
cash = json_normalize(gaap["Assets"]["units"]["USD"])
print(cash)

            end           val                  accn    fy  fp    form  \
0    2008-09-27   39572000000  0001193125-09-153165  2009  Q3    10-Q   
1    2008-09-27   39572000000  0001193125-09-214859  2009  FY    10-K   
2    2008-09-27   36171000000  0001193125-10-012091  2009  FY  10-K/A   
3    2008-09-27   36171000000  0001193125-10-238044  2010  FY    10-K   
4    2009-06-27   48140000000  0001193125-09-153165  2009  Q3    10-Q   
..          ...           ...                   ...   ...  ..     ...   
101  2020-09-26  323888000000  0000320193-21-000056  2021  Q2    10-Q   
102  2020-09-26  323888000000  0000320193-21-000065  2021  Q3    10-Q   
103  2020-12-26  354054000000  0000320193-21-000010  2021  Q1    10-Q   
104  2021-03-27  337158000000  0000320193-21-000056  2021  Q2    10-Q   
105  2021-06-26  329840000000  0000320193-21-000065  2021  Q3    10-Q   

          filed      frame  
0    2009-07-22        NaN  
1    2009-10-27        NaN  
2    2010-01-25        NaN  
3    20

In [41]:
dividends = json_normalize(gaap["Dividends"]["units"]["USD"])
print(dividends)

        start         end          val                  accn    fy  fp  form  \
0  2014-09-28  2015-09-26  11627000000  0000320193-17-000070  2017  FY  10-K   
1  2015-09-27  2016-09-24  12188000000  0000320193-18-000145  2018  FY  10-K   
2  2015-09-27  2016-09-24  12188000000  0000320193-17-000070  2017  FY  10-K   
3  2016-09-25  2017-09-30  12803000000  0000320193-18-000145  2018  FY  10-K   
4  2016-09-25  2017-09-30  12803000000  0000320193-17-000070  2017  FY  10-K   
5  2017-10-01  2018-09-29  13735000000  0000320193-18-000145  2018  FY  10-K   

        filed   frame  
0  2017-11-03  CY2015  
1  2018-11-05  CY2016  
2  2017-11-03     NaN  
3  2018-11-05  CY2017  
4  2017-11-03     NaN  
5  2018-11-05  CY2018  


In [43]:
debt = json_normalize(gaap["LongTermDebt"]["units"]["USD"])
print(debt)

           end          val                  accn    fy  fp  form       filed  \
0   2012-09-29            0  0001193125-13-416534  2013  FY  10-K  2013-10-30   
1   2012-09-29            0  0001193125-13-300670  2013  Q3  10-Q  2013-07-24   
2   2013-06-29  16958000000  0001193125-13-300670  2013  Q3  10-Q  2013-07-24   
3   2013-09-28  16960000000  0001193125-15-023732  2014  FY   8-K  2015-01-28   
4   2013-09-28  16960000000  0001193125-14-383437  2014  FY  10-K  2014-10-27   
5   2013-09-28  16960000000  0001193125-14-277160  2014  Q3  10-Q  2014-07-23   
6   2013-09-28  16960000000  0001193125-14-157311  2014  Q2  10-Q  2014-04-24   
7   2013-09-28  16960000000  0001193125-14-024487  2014  Q1  10-Q  2014-01-28   
8   2013-09-28  16960000000  0001193125-13-416534  2013  FY  10-K  2013-10-30   
9   2013-12-28  16961000000  0001193125-14-024487  2014  Q1  10-Q  2014-01-28   
10  2014-03-29  16962000000  0001193125-14-157311  2014  Q2  10-Q  2014-04-24   
11  2014-06-28  29030000000 

In [45]:
marketing = json_normalize(gaap["MarketingExpense"]["units"]["USD"])
print(marketing)

        start         end        val                  accn    fy  fp  form  \
0  2009-09-27  2010-09-25   75000000  0001193125-13-170623  2012  FY   8-K   
1  2009-09-27  2010-09-25   75000000  0001193125-12-444068  2012  FY  10-K   
2  2010-12-26  2011-03-26   25000000  0001193125-12-182321  2012  Q2  10-Q   
3  2011-03-27  2011-06-25   26000000  0001193125-12-314552  2012  Q3  10-Q   
4  2010-09-26  2011-09-24  102000000  0001193125-13-170623  2012  FY   8-K   
5  2010-09-26  2011-09-24  102000000  0001193125-12-444068  2012  FY  10-K   

        filed     frame  
0  2013-04-24    CY2010  
1  2012-10-31       NaN  
2  2012-04-25  CY2011Q1  
3  2012-07-25  CY2011Q2  
4  2013-04-24    CY2011  
5  2012-10-31       NaN  


In [48]:
gaap["NumberOfStores"]["units"].keys()
nb_stores = json_normalize(gaap["NumberOfStores"]["units"]["Store"])
print(nb_stores)

          end  val                  accn    fy  fp  form       filed  \
0  2014-12-27  447  0001193125-15-023697  2015  Q1  10-Q  2015-01-28   
1  2015-03-28  453  0001193125-15-153166  2015  Q2  10-Q  2015-04-28   
2  2015-06-27  456  0001193125-15-259935  2015  Q3  10-Q  2015-07-22   
3  2015-09-26  463  0001193125-15-356351  2015  FY  10-K  2015-10-28   
4  2015-12-26  469  0001193125-16-439878  2016  Q1  10-Q  2016-01-27   
5  2016-03-26  475  0001193125-16-559625  2016  Q2  10-Q  2016-04-27   

       frame  
0  CY2014Q4I  
1  CY2015Q1I  
2  CY2015Q2I  
3  CY2015Q3I  
4  CY2015Q4I  
5  CY2016Q1I  


In [50]:
dividend_payments = json_normalize(gaap["PaymentsOfDividends"]["units"]["USD"])
print(dividend_payments)

          start         end          val                  accn    fy  fp  \
0    2011-09-25  2011-12-31            0  0001193125-12-444068  2012  FY   
1    2011-09-25  2011-12-31            0  0001193125-13-300670  2013  Q3   
2    2011-09-25  2012-03-31            0  0001193125-13-168288  2013  Q2   
3    2012-01-01  2012-03-31            0  0001193125-12-444068  2012  FY   
4    2012-01-01  2012-03-31            0  0001193125-13-300670  2013  Q3   
..          ...         ...          ...                   ...   ...  ..   
178  2019-09-29  2020-06-27  10570000000  0000320193-21-000065  2021  Q3   
179  2019-09-29  2020-09-26  14081000000  0000320193-20-000096  2020  FY   
180  2020-09-27  2020-12-26   3613000000  0000320193-21-000010  2021  Q1   
181  2020-09-27  2021-03-27   7060000000  0000320193-21-000056  2021  Q2   
182  2020-09-27  2021-06-26  10827000000  0000320193-21-000065  2021  Q3   

     form       filed     frame  
0    10-K  2012-10-31       NaN  
1    10-Q  2013-07-

In [51]:
research_expenses = json_normalize(
    gaap["ResearchAndDevelopmentExpense"]["units"]["USD"]
)
print(research_expenses)

          start         end          val                  accn    fy  fp  \
0    2006-10-01  2007-09-29    782000000  0001193125-09-214859  2009  FY   
1    2006-10-01  2007-09-29    782000000  0001193125-10-012091  2009  FY   
2    2007-09-30  2008-06-28    811000000  0001193125-09-153165  2009  Q3   
3    2008-03-30  2008-06-28    292000000  0001193125-09-153165  2009  Q3   
4    2007-09-30  2008-09-27   1109000000  0001193125-09-214859  2009  FY   
..          ...         ...          ...                   ...   ...  ..   
164  2020-09-27  2020-12-26   5163000000  0000320193-21-000010  2021  Q1   
165  2020-09-27  2021-03-27  10425000000  0000320193-21-000056  2021  Q2   
166  2020-12-27  2021-03-27   5262000000  0000320193-21-000056  2021  Q2   
167  2020-09-27  2021-06-26  16142000000  0000320193-21-000065  2021  Q3   
168  2021-03-28  2021-06-26   5717000000  0000320193-21-000065  2021  Q3   

       form       filed     frame  
0      10-K  2009-10-27       NaN  
1    10-K/A  20

## Looking at frames now

In [59]:


def get_frames(cik: str, headers) -> dict:
    assert len(cik) == 10
    resp = requests.get(
        url_frames + "CIK" + cik + ".json", headers=headers, timeout=10
    )
    resp_json = None
    if resp.status_code < 400:
        resp_json = resp.json()
    return resp_json

get_frames(apple_cik, headers)

https://data.sec.gov/api/xbrl/frames/


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [71]:
print(url_frames)
resp = requests.get(
    url_frames + "CIK" + apple_cik + ".json", headers=headers, timeout=10
)
type(resp)
print(resp.content)
type(resp.content)
# resp_json = json.load(resp.content)

https://data.sec.gov/api/xbrl/frames/
b'<?xml version="1.0" encoding="UTF-8"?>\n<Error><Code>NoSuchKey</Code><Message>The specified key does not exist.</Message><Key>api/xbrl/frames/CIK0000320193.json</Key><RequestId>QJTNWG8XZ8CVFSQF</RequestId><HostId>sHtONm0Td298pVwuXQF1c3NX52lAN2inMIf/OGMNi6ZuamrhGO5BSaId3p0DY9Pzi/rH01s3+Es=</HostId></Error>'


bytes

In [None]:
# resp_json = get_frames(apple_cik, headers)
resp_json = get_frames(apple_cik, headers)
resp_json.keys()

## get submissions

In [72]:
def get_submissions(cik: str, headers) -> dict:
    assert len(cik) == 10
    resp = requests.get(
        url_submissions + "CIK" + cik + ".json", headers=headers, timeout=10
    )
    resp_json = None
    if resp.status_code < 400:
        resp_json = resp.json()
    return resp_json

In [73]:
resp = get_submissions(apple_cik, headers)