# Security and Exchange Commision - Edgar database API

Financial reports for US-listed companies.

- Do not require authentication or API key
- Access forms 10-Q, 10-K, 8-K, 20-F, 40-F, 6-K, and their variants
- Reports in XBRL format (eXtensible Business Reporting Language) 

API documentation:
https://www.sec.gov/edgar/sec-api-documentation


Combines with
CEO dismissal database


In [13]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [3]:
import requests
import pandas as pd
from pandas import json_normalize
import json
import matplotlib.pyplot as plt

In [4]:
plt.style.use("seaborn")

In [5]:
# SEC Edgar database API
# https://www.sec.gov/edgar/sec-api-documentation

# history of submissions of financial report fillings
url_submissions = "https://data.sec.gov/submissions/"

# a few quarters of financial reports
url_facts = "https://data.sec.gov/api/xbrl/companyfacts/"

# hisotry of values for a specific financial report entry (e.g. Revenues)
# for a specific company
url_concepts = "https://data.sec.gov/api/xbrl/companyconcepts/"

# ...
url_frames = "https://data.sec.gov/api/xbrl/frames/"

In [6]:
# url_facts
# test with Apple Inc.
# Apple Inc. CIK found manually from https://www.sec.gov/edgar/searchedgar/cik.htm

apple_cik = "0000320193"
headers = {"User-Agent": "Anselme Borgeaud (individual) aborgeaud@gmail.com"}


def get_facts(cik: str, headers) -> dict:
    assert len(cik) == 10
    resp = requests.get(
        url_facts + "CIK" + apple_cik + ".json", headers=headers, timeout=5
    )
    resp_json = None
    if resp.status_code < 400:
        resp_json = resp.json()
    return resp_json

In [18]:
resp_jons = get_facts(apple_cik, headers)

NameError: name 'resp_json' is not defined

In [None]:
cik_str = f"{resp_json['cik']:010d}"
cik_str

In [None]:
resp_json["entityName"]

In [None]:
# keys of resp_json['facts'] are the taxonomies (e.g., us-gaap, ifrs-full, dei, or srt)
resp_json["facts"].keys()

In [None]:
gaap = resp_json["facts"]["us-gaap"]

In [None]:
resp_json["facts"]["dei"].keys()

In [None]:
for i, k in enumerate(gaap.keys()):
    if i < 20:
        print(f"{i} - {k}")

In [None]:
for i, k in enumerate(gaap.keys()):
    print(f"{i} - {k}")

In [None]:
gaap["Revenues"].keys()

In [None]:
gaap["Revenues"]["units"].keys()

In [None]:
revenues = gaap["Revenues"]["units"]["USD"]

revenues[0].keys()

In [None]:
for revenue in revenues:
    print(f"{revenue['start']} to {revenue['end']}")

In [None]:
df_revenues = json_normalize(revenues)
df_revenues

In [None]:
df_quarter_revenues = df_revenues[df_revenues["frame"].str.contains("Q")]
df_quarter_revenues

In [None]:
df_quarter_revenues.plot(x="frame", y="val", kind="bar")

In [7]:
# submissions


def get_submissions(cik: str, headers) -> dict:
    assert len(cik) == 10
    resp = requests.get(
        url_submissions + "CIK" + apple_cik + ".json", headers=headers, timeout=5
    )
    resp_json = None
    if resp.status_code < 400:
        resp_json = resp.json()
    return resp_json

In [8]:
submission_json = get_submissions(apple_cik, headers)

In [9]:
submission_json.keys()

dict_keys(['cik', 'entityType', 'sic', 'sicDescription', 'insiderTransactionForOwnerExists', 'insiderTransactionForIssuerExists', 'name', 'tickers', 'exchanges', 'ein', 'description', 'website', 'investorWebsite', 'category', 'fiscalYearEnd', 'stateOfIncorporation', 'stateOfIncorporationDescription', 'addresses', 'phone', 'flags', 'formerNames', 'filings'])

In [None]:
submission_json["filings"]["recent"]

In [None]:
submission_json["insiderTransactionForIssuerExists"]

In [None]:
# company concepts

concepts = requests.get(
    f"https://data.sec.gov/api/xbrl/companyconcept/CIK{apple_cik}/us-gaap/AccountsPayableCurrent.json",
    headers=headers,
)

concepts_json = concepts.json()

In [None]:
concepts_json.keys()

In [None]:
df_accpay = json_normalize(concepts_json["units"]["USD"])
df_accpay

In [None]:
df_accpay_quarterly = df_accpay[df_revenues["fp"].str.contains("Q")]

In [None]:
df_accpay_quarterly["period"] = (
    df_accpay_quarterly["fy"].astype("str") + df_accpay_quarterly["fp"]
)

In [None]:
df_accpay_quarterly.plot(x="period", y="val", kind="bar")

# Yahoo Finance API

In [11]:
from datetime import datetime, date, time, timezone


def yahoo_url(ticker: str, from_day: str, to_day: str) -> str:
    """Get url to request yahoo finance."""
    from_dt = int(
        datetime.combine(
            date.fromisoformat(from_day), time(), tzinfo=timezone.utc
        ).timestamp()
    )
    to_dt = int(
        datetime.combine(
            date.fromisoformat(to_day), time(), tzinfo=timezone.utc
        ).timestamp()
    )
    return (
        "https://query1.finance.yahoo.com/v7/finance/download/"
        f"{ticker.upper()}"
        f"?period1={from_dt}&period2={to_dt}&interval=1d"
        "&events=history&includeAdjustedClose=true"
    )

In [12]:
ticker = "AAPL"
from_day = "2021-01-01"
to_day = "2021-09-01"

url = yahoo_url(ticker, from_day, to_day)
headers = {"User-Agent": "Chrome/92.0.4515.159"}

resp = requests.get(url, headers=headers, timeout=5)
print(resp.status_code)

AttributeError: type object 'datetime.date' has no attribute 'fromisoformat'

In [None]:
ticker_infos = resp.content.decode("utf-8")

In [None]:
import io

df_ticker = pd.read_csv(io.StringIO(ticker_infos))
df_ticker

In [None]:
df_ticker["Date"] = pd.to_datetime(df_ticker["Date"])

In [None]:
df_ticker.plot(x="Date", y="Close", kind="line")