---
# Setup

In [1]:
%%html
<style>
table {float:left}
</style>

# Parsing SEC Filing XBRL Document


## Objective

Parse the filing XBRL file to create a DOM like structure that represent the filing data

## References

* [XBRL Specification - Extensible Business Reporting Language (XBRL) 2.1](https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html)

* [List of US GAAP Standards](https://xbrlsite.azurewebsites.net/2019/Prototype/references/us-gaap/)
* [XBRL US - List of Elements](https://xbrl.us/data-rule/dqc_0015-le/)

**Element Version**|**Element ID**|**Namespace**|**Element Label**|**Element Name**|**Balance Type**|**Definition**
:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:
1|1367|us-gaap|Interest Expense|InterestExpense|debit|Amount of the cost of borrowed funds accounted for as interest expense.
2|2692|us-gaap|Cash and Cash Equivalents, at Carrying Value|CashAndCashEquivalentsAtCarryingValue|debit|Amount of currency on hand as well as demand deposits with banks or financial institutions. Includes other kinds of accounts that have the general characteristics of demand deposits. Also includes short-term, highly liquid investments that are both readily convertible to known amounts of cash and so near their maturity that they present insignificant risk of changes in value because of changes in interest rates. Excludes cash and cash equivalents within disposal group and discontinued operation.

## XBRL Element

* [XBRL Glossary of Terms](https://www.sec.gov/page/osd_xbrlglossary)
* [XBRL - What is us-gaap:OperatingSegmentsMember element anb where is it defined?](https://money.stackexchange.com/questions/148010/xbrl-what-is-us-gaapoperatingsegmentsmember-element-anb-where-is-it-defined)

### Example
For instance, Qorvo 2020 10K

* [XBRL/rfmd-20210403_htm.xml](https://www.sec.gov/Archives/edgar/data/1604778/000160477821000032/rfmd-20210403_htm.xml)
* [HTML/rfmd-20210403.htm)](https://www.sec.gov/Archives/edgar/data/1604778/000160477821000032/rfmd-20210403.htm):

```
<us-gaap:cashandcashequivalentsatcarryingvalue contextref="*" decimals="-3" id="..." unitref="usd">
  1397880000
</us-gaap:cashandcashequivalentsatcarryingvalue>,
<us-gaap:cashandcashequivalentsatcarryingvalue contextref="***" decimals="-3" id="..." unitref="usd">
  714939000
</us-gaap:cashandcashequivalentsatcarryingvalue>,
<us-gaap:cashandcashequivalentsatcarryingvalue contextref="***" decimals="-3" id="..." unitref="usd">
 711035000
</us-gaap:cashandcashequivalentsatcarryingvalue>
```

Corresponds to the Cash and Cash equivalents in the Cash Flow statement.

<img src="../image/edgar_qorvo_2020_10K_CF.png" align="left" width=800 />

In [2]:
from typing import (
    List,
    Dict
)
import operator
import logging
import time
import datetime
import dateutil
import calendar
import re
import requests
import unicodedata
import bs4
from bs4 import BeautifulSoup
from IPython.core.display import (
    display, 
    HTML
)

import numpy as np
import pandas as pd
pd.set_option('display.float_format', lambda x: ('%f' % x).rstrip('0').rstrip('.'))
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
logging.basicConfig(level=logging.DEBUG)
Logger = logging.getLogger(__name__)

In [4]:
def restore_windows_1252_characters(restore_string):
    """
        Replace C1 control characters in the Unicode string s by the
        characters at the corresponding code points in Windows-1252,
        where possible.
    """

    def to_windows_1252(match):
        try:
            return bytes([ord(match.group(0))]).decode('windows-1252')
        except UnicodeDecodeError:
            # No character at the corresponding code point: remove it.
            return ''
        
    return re.sub(r'[\u0080-\u0099]', to_windows_1252, restore_string)

# FS Constant

In [5]:
%load_ext autoreload
%autoreload 2
from sec_edgar_constant import (
    EDGAR_HTTP_HEADERS,
    SEC_FORM_TYPE_10K,
    SEC_FORM_TYPE_10Q,
    FS_PL,
    FS_BS,
    FS_CF,
    FS_ELEMENT_REP_SHARES_OUTSTANDING,
    # PL
    FS_ELEMENT_REP_REVENUE,
    FS_ELEMENT_REP_OP_COST,
    FS_ELEMENT_REP_OP_INCOME,
    FS_ELEMENT_REP_GROSS_PROFIT,
    FS_ELEMENT_REP_OPEX_RD,
    FS_ELEMENT_REP_OPEX_SGA,
    FS_ELEMENT_REP_OPEX,
    FS_ELEMENT_REP_NET_INCOME,
    FS_ELEMENT_REP_EPS,
    # BS
    FS_ELEMENT_REP_CASH,
    FS_ELEMENT_REP_CURRENT_ASSETS,
    FS_ELEMENT_REP_TOTAL_ASSETS,
    FS_ELEMENT_REP_CURRENT_LIABILITIES,
    FS_ELEMENT_REP_LIABILITIES,
    FS_ELEMENT_REP_EQUITY,           # Stockholders' Equity or Total Equity
    FS_ELEMENT_REP_EQUITY_AND_LIABILITIES,
)
from sec_edgar_common import (
    is_date_string,
    filename_basename,
    filename_extension,
)
from xbrl_gaap_function import (
    REGEXP_NUMERIC,
    REGEXP_XBRL_TAG_CONTEXT,
    REGEXP_XBRL_TAG_INSTANT,
    REGEXP_XBRL_TAG_PERIOD,
    REGEXP_XBRL_TAG_START_DATE,
    REGEXP_XBRL_TAG_END_DATE,

    get_company_name,
    get_attributes_to_select_target_fs_elements,
    get_financial_element_columns,
    get_target_context_ids,
    # sed -n 's/^def \(get_pl_.*\)(.*/\1,/p'
    get_pl_revenues,
    get_pl_revenue_other,
    get_pl_income_interest,
    get_pl_income_other,
    get_pl_cost_of_revenues,
    get_pl_gross_profit,
    get_pl_operating_expense_r_and_d,
    get_pl_operating_expense_selling_administrative,
    get_pl_operating_expense_other,
    get_pl_operating_expense_total,
    get_pl_operating_income,
    get_pl_non_operating_expense_interest,
    get_pl_non_operating_expense_other,
    get_pl_income_tax,
    get_pl_net_income,
    get_pl_shares_outstanding,
    get_pl_eps,
    # sed -n 's/^def \(get_bs_.*\)(.*/\1,/p'
    get_bs_current_asset_cash_and_equivalents,
    get_bs_current_asset_restricted_cash_and_equivalents,
    get_bs_current_asset_short_term_investments,
    get_bs_current_asset_account_receivables,
    get_bs_current_asset_inventory,
    get_bs_current_asset_other,
    get_bs_current_assets,
    get_bs_non_current_asset_property_and_equipment,
    get_bs_non_current_asset_restricted_cash_and_equivalent,
    get_bs_non_current_asset_deferred_income_tax,
    get_bs_non_current_asset_goodwill,
    get_bs_non_current_asset_other,
    get_bs_total_assets,
    get_bs_current_liability_account_payable,
    get_bs_current_liability_tax,
    get_bs_current_liability_longterm_debt,
    get_bs_current_liabilities,
    get_bs_non_current_liability_longterm_debt,
    get_bs_non_current_liability_deferred_tax,
    get_bs_non_current_liability_other,
    get_bs_total_liabilities,
    get_bs_stockholders_equity_paid_in,
    get_bs_stockholders_equity_retained,
    get_bs_stockholders_equity_other,
    get_bs_stockholders_equity,
    get_bs_total_liabilities_and_stockholders_equity,
)
from sec_edgar_list_xbrl_xml import (
    EdgarList
)
xbrl_url = EdgarList.xbrl_url

DEBUG:ray:[ray] Forcing OMP_NUM_THREADS=1 to avoid performance degradation with many workers (issue #6998). You can override this by explicitly setting OMP_NUM_THREADS.


---
# Load EDGAR Filing XBRL

Download the ```_htm.xml``` file from EDGAR. SEC now requires user-agent header.

## Filling (CIK/Accession)

In [6]:
# AMKOR 2020 10K
CIK = '1047127'
ACCESSION = '0001047127-20-000006'.replace('-', '')
FORM_TYPE = SEC_FORM_TYPE_10K

In [7]:
# DIOD 2020 10K
CIK = '29002'
ACCESSION = '000156459021007008'
FORM_TYPE = SEC_FORM_TYPE_10K

In [8]:
# OPTICAL CABLE CORPORATION 10-K for the fiscal year ended October 31, 2021
CIK = '0001000230'
ACCESSION = '000143774921028951'
FORM_TYPE = SEC_FORM_TYPE_10K

In [9]:
# QORVO 2021 10K
CIK = '1604778'
ACCESSION = '000160477821000032'
FORM_TYPE = SEC_FORM_TYPE_10K

In [10]:
# NICHOLAS FINANCIAL, INC.
CIK = '1000045'
ACCESSION = '000119312513259413'
FORM_TYPE = SEC_FORM_TYPE_10K

In [11]:
CIK = '1000697'
ACCESSION = '000007202010000036'
FORM_TYPE = SEC_FORM_TYPE_10Q

In [12]:
CIK = '1000697'
ACCESSION = '000095012310045994'
FORM_TYPE = SEC_FORM_TYPE_10Q

In [13]:
# AMKOR 2021 10Q
CIK = '1047127'
ACCESSION = '000104712721000043'
FORM_TYPE = SEC_FORM_TYPE_10Q

In [14]:
INDEX_XML_URL = f"https://www.sec.gov/Archives/edgar/data/{CIK}/{ACCESSION}/index.xml"
INDEX_XML_URL

'https://www.sec.gov/Archives/edgar/data/1047127/000104712721000043/index.xml'

## Filing Directory Listing

Using EDGAR Directory Listing (index.html, index.json, or index.xml), identify the XBRL file (_htm.xml) path. 

## Download XBRL

In [15]:
XBRL_XML_URL = xbrl_url(INDEX_XML_URL)

response = requests.get(XBRL_XML_URL, headers=EDGAR_HTTP_HEADERS)
if response.status_code == 200:
    content = response.content.decode("utf-8") 
else:
    assert False, f"{XBRL_URL} failed with status {response.status_code}"
    
del response

INFO:root:xbrl_url(): getting filing directory index [https://www.sec.gov/Archives/edgar/data/1047127/000104712721000043/index.xml]...
DEBUG:root:http_get_content(): GET url [https://www.sec.gov/Archives/edgar/data/1047127/000104712721000043/index.xml] headers [{'User-Agent': 'Company Name myname@company.com'}]
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.sec.gov:443
DEBUG:urllib3.connectionpool:https://www.sec.gov:443 "GET /Archives/edgar/data/1047127/000104712721000043/index.xml HTTP/1.1" 200 1218
INFO:root:identifying XBRL URL for [https://www.sec.gov/Archives/edgar/data/1047127/000104712721000043/index.xml]...
DEBUG:root:Filing directory path is [/Archives/edgar/data/1047127/000104712721000043/]
INFO:root:XBRL XML [https://sec.gov/Archives/edgar/data/1047127/000104712721000043/amkr-20210930_htm.xml] identified
INFO:root:XBRL URL identified [https://sec.gov/Archives/edgar/data/1047127/000104712721000043/amkr-20210930_htm.xml]
DEBUG:urllib3.connectionpool:Start

In [16]:
filename_basename(XBRL_XML_URL)

'amkr-20210930_htm'

In [17]:
content = restore_windows_1252_characters(content)

# XBRL

Use the BS4 HTML Parser to be able to:
1. Handle ```<namespace:tag>``` as a single tag (with lower cases)
2. Work without namespace definitions (xmlns=) 

BS4/XML parser simply drops the namespaces when the namespace definitions are not provided, hence searching tags with ```<namespace:tag>``` does not match. To avoid this issue, use HTML parser.

In [18]:
# HTML parser lowers all the names
soup = BeautifulSoup(content, 'html.parser')
# HTML parser does not match case sensitivve name
print(soup.find("dei:EntityRegistrantName"))

None


In [19]:
# Verify [<xbrl xmlns="http://www.xbrl.org/2003/instance">]
#assert soup.find('xbrl', attrs={"xmlns": "http://www.xbrl.org/2003/instance"}), \
#    f"Invalid XBRL {INDEX_XML_URL}"

## Company Name

In [20]:
def get_company_name(soup):
    """Get company (registrant) name from the XBRL"""
    registrant_name = soup.find(
        name=re.compile("dei:EntityRegistrantName".lower(), re.I)
    ).string.strip()
    assert registrant_name, f"No registrant name found"

    # Remove non-ascii characters to form a company name
    registrant_name = ''.join(e for e in registrant_name if e.isalnum())
    logging.debug("get_company_name(): company name is [%s]" % registrant_name)
    return registrant_name

In [21]:
company_name = get_company_name(soup)

DEBUG:root:get_company_name(): company name is [AMKORTECHNOLOGYINC]


## Repoting period

Each 10-K and 10-Q XBRL has the reporting period for the filing. To exclude the other period, e.g. pervious year or quarter, use the ```context id``` for the reporting period. **Most** 10-K, 10-Q specify the annual period with the first ```<startDate> and <endDate>``` tags.

For instances:

### QRVO 10-K 2020

```
<context id="ifb6ce67cf6954ebf88471dd82daa9247_D20200329-20210403">
    <entity>
    <identifier scheme="http://www.sec.gov/CIK">0001604778</identifier>
    </entity>
    <period>
        <startDate>2020-03-29</startDate>
        <endDate>2021-04-03</endDate>
    </period>
</context>
```

### AMKR 10-K 2020

```
<context id="i5fac0a392353427b8266f185495754d3_D20200101-20201231">
    <entity>
    <identifier scheme="http://www.sec.gov/CIK">0001047127</identifier>
    </entity>
    <period>
        <startDate>2020-01-01</startDate>
        <endDate>2020-12-31</endDate>
    </period>
</context>
```

### AAPL 10-Q 4th QTR 2020

```
<context id="i6e431846933d461fb8c8c0bdf98c9758_D20200927-20201226">
    <entity>
    <identifier scheme="http://www.sec.gov/CIK">0000320193</identifier>
    </entity>
    <period>
        <startDate>2020-09-27</startDate>
        <endDate>2020-12-26</endDate>
    </period>
</context>
```

**However, there are companies that do not have this manner**. For instance [10-K for OPTICAL CABLE CORPORATION(CIK=0001000230)](https://www.sec.gov/Archives/edgar/data/1000230/000143774921028951/occ20211031_10k_htm.xml) has the same start and end dates at first.

```
<context id="d202110K">
    <entity>
        <identifier scheme="http://www.sec.gov/CIK">0001000230</identifier>
    </entity>
    <period>
        <startDate>2021-10-31</startDate>   # <-----
        <endDate>2021-10-31</endDate>
    </period>
</context>
<context id="d_2020-11-01_2021-10-31">
    <entity>
        <identifier scheme="http://www.sec.gov/CIK">0001000230</identifier>
    </entity>
    <period>
        <startDate>2020-11-01</startDate>   # <-----
        <endDate>2021-10-31</endDate>
    </period>
</context>
```

The report uses the 2nd for 2021 F/S element value but does not use the first one.

**B/S**
```
<us-gaap:Assets 
    contextRef="i_2021-10-31"    # <-----
    decimals="INF" 
    id="c79893606" 
    unitRef="USD"
>
  37916530
</us-gaap:Assets>
```

**P/L**
```
<us-gaap:RevenueFromContractWithCustomerIncludingAssessedTax 
  contextRef="d_2020-11-01_2021-10-31"     # <-----
  decimals="INF" 
  id="c79893662" 
  unitRef="USD"
>
  59136294
</us-gaap:RevenueFromContractWithCustomerIncludingAssessedTax>
```

<img src='../image/edgar_optical_cable_2021_10K.png' align="left" width=500/>

### Get the period from the 1st

For now, just get the period from the 1st **period** element.

In [22]:
def get_report_period_end_date(soup, date_from_xbrl_filename=None):
    """Identify the end date of the report period from the first "context" tag
    in the XBRL that has <period><startDate> tag as its child tag. 

    <context id="ifb6ce67cf6954ebf88471dd82daa9247_D20200329-20210403">
        <entity>
        <identifier scheme="http://www.sec.gov/CIK">0001604778</identifier>
        </entity>
        <period>
            <startDate>2020-03-29</startDate>
            <endDate>2021-04-03</endDate>        # <-----
        </period>
    </context>

    Args:
        soup: Source BS4
        date_from_xbrl_filename: Date extracted from XBRL XML filename 
    Returns: reporting period e.g. "2021-09-30"
    """
    candidates = []
    report_period_end_date = None
    
    # --------------------------------------------------------------------------------
    # List all the endDate from the Contexts
    # --------------------------------------------------------------------------------
    regexp_end_date_pattern = re.compile(r"(\s*)([12][0-9]{3}-[0-9]{1,2}-[0-9]{1,2})(\s*)")
    for context in soup.find_all(REGEXP_XBRL_TAG_CONTEXT):
        # --------------------------------------------------------------------------------
        # Find Context tag which has <period><enddate> child tag
        # --------------------------------------------------------------------------------
        period = context.find(REGEXP_XBRL_TAG_PERIOD)
        if period:
            end_date = period.find(REGEXP_XBRL_TAG_END_DATE)
            if end_date:
                match = re.match(regexp_end_date_pattern, end_date.string.strip())
                if match and is_date_string(match.group(2)):
                    candidates.append(match.group(2))

    assert len(candidates) > 0, "No period found"
    
    # --------------------------------------------------------------------------------
    # If the date from the XBRL XML name is in the candidate, use it as the end_datge
    # Otherwise use the first candidate.
    # --------------------------------------------------------------------------------
    if date_from_xbrl_filename in candidates:
        report_period_end_date = date_from_xbrl_filename
    else:
        report_period_end_date = candidates[0]

    return report_period_end_date

In [23]:
def get_date_from_xbrl_filename(filename):
    """
    Infer the reporting period end date from the XBRL XML filename.
    e.g if XBRL XML is rfmd-20210403_htm.xml, then 2021-04-03 is highly
    like to be the report period end date.

    In the XBRL XML file, there is a Context which identifies the end date
    of the report period.

    <context id="ifb6ce67cf6954ebf88471dd82daa9247_D20200329-20210403">
        <entity>
        <identifier scheme="http://www.sec.gov/CIK">0001604778</identifier>
        </entity>
        <period>
            <startDate>2020-03-29</startDate>
            <endDate>2021-04-03</endDate>         # <--- end of the period
        </period>
    </context>

    This endDate value is highly likely to match the date from the filename.
    """
    filename = filename_basename(filename)
    if False:
        pattern = re.compile(r"([^0-9]*)[-]*([12][0-9]{3})[-]*([0-9]{2})[-]*([0-9]{2})(.*)")
        match = re.match(pattern, filename)
        if match:
            year = match.group(2)
            month = match.group(3)
            day = match.group(4)
            date_from_xbrl_filename = "-".join([year, month, day])
            return date_from_xbrl_filename
        else:
            return None
    else:
        pattern = re.compile(r"(.*)([12][0-9]{7})(.*)")
        match = re.match(pattern, filename)
        if match:
            print(match.groups())
            yyyymmdd = match.group(2)
            year = yyyymmdd[:4]
            month = yyyymmdd[4:6]
            day = yyyymmdd[6:8]
            return "-".join([year, month, day])
        else:
            return None

date_from_xbrl_filename = get_date_from_xbrl_filename(filename_basename(XBRL_XML_URL))
date_from_xbrl_filename

('amkr-', '20210930', '_htm')


'2021-09-30'

In [24]:
print(get_date_from_xbrl_filename(filename_basename(XBRL_XML_URL)))
print(get_date_from_xbrl_filename("hsicform10q20200926_htm.xml"))

('amkr-', '20210930', '_htm')
2021-09-30
('hsicform10q', '20200926', '_htm')
2020-09-26


In [25]:
report_period_end_date = get_report_period_end_date(soup, date_from_xbrl_filename)
report_period_end_date

'2021-09-30'

### List contexts whose endDate matrches report period end date

In [26]:
# Experiment to test extract the context that has the matching end date
regexp = re.compile(rf"(\s*)({report_period_end_date})(\s*)")

for context in soup.find_all(REGEXP_XBRL_TAG_CONTEXT):
    period = context.find(REGEXP_XBRL_TAG_PERIOD)
    if period:
        end_date = period.find(REGEXP_XBRL_TAG_END_DATE)
        if end_date:
            match = re.match(regexp, end_date.string.strip())
            if match:
                print(f"match[contxt_id {context['id']} : {match.group(2)}]")
            else:
                print(f"no match {context['id']} : {end_date.string.strip()}")


match[contxt_id i9289f6e06c5a47e09383379425d29391_D20210101-20210930 : 2021-09-30]
match[contxt_id icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930 : 2021-09-30]
no match ia455a570aa4d45b4bc44cd7e8de9d9b4_D20200701-20200930 : 2020-09-30
no match ic2e1ad291ad74d618f2b7a29813860ca_D20200101-20200930 : 2020-09-30
match[contxt_id ifc522cda813e4842a934b78a0163bb07_D20210701-20210930 : 2021-09-30]
match[contxt_id i17ee23e173904148bb08842b2e310bfe_D20210701-20210930 : 2021-09-30]
match[contxt_id i0037e2367a854cfbb9461ecc19fd857b_D20210701-20210930 : 2021-09-30]
match[contxt_id i09edd8b4692f4d23b945d745226e25ea_D20210701-20210930 : 2021-09-30]
match[contxt_id if99c73b4a593477f9a8eb4b28c6280b5_D20210701-20210930 : 2021-09-30]
match[contxt_id i5c45e13d3f674959806aa9bc9338a4d3_D20210701-20210930 : 2021-09-30]
match[contxt_id if98a35bfadbe4c08a639d8472e6474dc_D20210101-20210930 : 2021-09-30]
match[contxt_id i505b3ad14f7f4dfc997f14c48fbafb20_D20210101-20210930 : 2021-09-30]
match[contxt_id id83

In [27]:
### List contexts whose instant matrches report period end date

In [28]:
# Experiment to test extract the context that has the matching end date
regexp = re.compile(rf"(\s*)({report_period_end_date})(\s*)")

for context in soup.find_all(REGEXP_XBRL_TAG_CONTEXT):
    period = context.find(REGEXP_XBRL_TAG_PERIOD)
    if period:
        instant = period.find([REGEXP_XBRL_TAG_INSTANT])
        if instant:
            match = re.match(regexp, instant.string)
            if match:
                print(f"match[contxt_id {context['id']} : {match.group(2)}]")
            else:
                print(f"no match {context['id']} : {instant.string.strip()}")


no match ic04e79f136204c2db93e08e57db55c36_I20211022 : 2021-10-22
match[contxt_id i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930 : 2021-09-30]
no match i8f5f44f1abec41629b71c79c8f7b9b77_I20201231 : 2020-12-31
no match i678d78c5aa3146938956f9db3497e365_I20210630 : 2021-06-30
no match i80c567e66b9146729a12930f8cab3a2c_I20210630 : 2021-06-30
no match i8085e9af4fce4988822ef7f482056a47_I20210630 : 2021-06-30
no match i97b6e378b9f44b2c989495e333888e4d_I20210630 : 2021-06-30
no match i32f8ba25f994490aa02c014c437b891e_I20210630 : 2021-06-30
no match i2f02b63f51244656816dba3d559b1209_I20210630 : 2021-06-30
no match i9dd25bfcd9e54638aa03d1a3e3784b0c_I20210630 : 2021-06-30
no match i4a2c61723aad43ec8571211415427ee3_I20210630 : 2021-06-30
match[contxt_id iac8b4d6a42f04c3490ca0d40e3a1c998_I20210930 : 2021-09-30]
match[contxt_id icb30e95fa0f44b5d92822d45b15dc68f_I20210930 : 2021-09-30]
match[contxt_id i4e0c7614ca93413f98b1e7b603ee6bc5_I20210930 : 2021-09-30]
match[contxt_id i8602e16198034c75a1dc61e5edd

no match i7096f3394103423596cf8aca99307824_I20201031 : 2020-10-31
match[contxt_id i762792c0e5a94869b3f3c9d7a3d6fefb_I20210930 : 2021-09-30]
no match i7e4ae9d5934848e8b1789b24330839b0_I20201231 : 2020-12-31
match[contxt_id ib09c8a783faa4dd1a97f9099877d54cb_I20210930 : 2021-09-30]
no match i61612c290d994e2eaa3fc29938e23163_I20201231 : 2020-12-31
match[contxt_id ie8d72c9460a4470db2e58de40b89d7f9_I20210930 : 2021-09-30]
no match ib5dea2dd8bd344c1bb54bf307b81cfbc_I20201231 : 2020-12-31
match[contxt_id i91e94d4627744428be091405f8dc9bd3_I20210930 : 2021-09-30]
no match i75dfc247bffa4814a9a5a595d835a20e_I20201231 : 2020-12-31
match[contxt_id i9cb4f758915445a4846e33744bad45b6_I20210930 : 2021-09-30]
no match i4da4607238644b9fae5b670321f4d273_I20201231 : 2020-12-31
match[contxt_id i111280527627402484cfdd57cf8a8a2c_I20210930 : 2021-09-30]
no match i11ea44b55ca6497ba1b667f4192fff80_I20201231 : 2020-12-31
match[contxt_id i7fa956eb1d0d411b809e5092a4b254f0_I20210930 : 2021-09-30]
no match ie4f448d5ef

### Regexp to find all the contexts that match with report_period_end_date

10-K, 10-Q F/S uses multiple contexts to refer to the F/S element values for the **period**. Collect all the contexts for the **period**.

In [29]:
# Example
def get_start_date_range(end_date, form_type):
    end_date = dateutil.parser.parse(end_date)
    if form_type == SEC_FORM_TYPE_10K:
        duration = datetime.timedelta(days=365) 

    if form_type == SEC_FORM_TYPE_10Q:
        duration = datetime.timedelta(days=90) 

    _from = end_date - duration - datetime.timedelta(days=30)
    _to  = end_date - duration + datetime.timedelta(days=30)
    print(f"Range: ({str(_from)[:10]}, {str(_to)[:10]})")
    
get_start_date_range("2010-03-31", SEC_FORM_TYPE_10Q)

Range: (2009-12-01, 2010-01-30)


## Context IDs that refer to the current period

In [30]:
get_target_context_ids(soup=soup, report_period_end_date=report_period_end_date, form_type=FORM_TYPE)

['icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930',
 'ifc522cda813e4842a934b78a0163bb07_D20210701-20210930',
 'i17ee23e173904148bb08842b2e310bfe_D20210701-20210930',
 'i0037e2367a854cfbb9461ecc19fd857b_D20210701-20210930',
 'i09edd8b4692f4d23b945d745226e25ea_D20210701-20210930',
 'if99c73b4a593477f9a8eb4b28c6280b5_D20210701-20210930',
 'i5c45e13d3f674959806aa9bc9338a4d3_D20210701-20210930',
 'i6546f5d3fe1e4db2af98357deb902bcd_D20210701-20210930',
 'iff65c328d95d40fc84b3fff7970e7d78_D20210701-20210930',
 'i1aa81414412e450cbf7354f54fa0365d_D20210701-20210930',
 'ia138785b671b4dc1bebe4e89ea0fdd09_D20210701-20210930',
 'i4c8928a64bf8491cb6752e21039f2bc5_D20210701-20210930',
 'id60d478483fe42d6be92003c47f3e2dd_D20210701-20210930',
 'ia9b20a11f6b74d91ac1100a579611f94_D20210701-20210930',
 'i4daa3a5df2864d13b94454a53b144370_D20210701-20210930',
 'if4c1b03b727d44628f91062404773229_D20210701-20210930',
 'i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930',
 'iac8b4d6a42f04c3490ca0d40e3a1c998_I202

---
# Constant

In [31]:
from xbrl_gaap_constant import (
    NAMESPACE_GAAP,
    GAAP_CREDIT_ITEMS,
    GAAP_DEBIT_ITEMS,
    GAAP_CALC_ITEMS,
    GAAP_METRIC_ITEMS,
    GAAP_FACT_ITEMS,
)
from sec_edgar_constant import (
    FS_ELEMENT_TYPE_DEBIT,
    FS_ELEMENT_TYPE_CREDIT,
    FS_ELEMENT_TYPE_CALC,
    FS_ELEMENT_TYPE_METRIC,
    FS_ELEMENT_TYPE_FACT,
)

In [32]:
CONTEXT_REGEXP = re.compile("|".join(
    get_target_context_ids(soup=soup, report_period_end_date=report_period_end_date, form_type=FORM_TYPE))
)

In [33]:
CONTEXT_REGEXP

re.compile(r'icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930|ifc522cda813e4842a934b78a0163bb07_D20210701-20210930|i17ee23e173904148bb08842b2e310bfe_D20210701-20210930|i0037e2367a854cfbb9461ecc19fd857b_D20210701-20210930|i09edd8b4692f4d23b945d745226e25ea_D20210701-20210930|if99c73b4a593477f9a8eb4b28c6280b5_D20210701-20210930|i5c45e13d3f674959806aa9bc9338a4d3_D20210701-20210930|i6546f5d3fe1e4db2af98357deb902bcd_D20210701-20210930|iff65c328d95d40fc84b3fff7970e7d78_D20210701-20210930|i1aa81414412e450cbf7354f54fa0365d_D20210701-20210930|ia138785b671b4dc1bebe4e89ea0fdd09_D20210701-20210930|i4c8928a64bf8491cb6752e21039f2bc5_D20210701-20210930|id60d478483fe42d6be92003c47f3e2dd_D20210701-20210930|ia9b20a11f6b74d91ac1100a579611f94_D20210701-20210930|i4daa3a5df2864d13b94454a53b144370_D20210701-20210930|if4c1b03b727d44628f91062404773229_D20210701-20210930|i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930|iac8b4d6a42f04c3490ca0d40e3a1c998_I20210930|icb30e95fa0f44b5d92822d45b15dc68f_I20210930|i4e0c76

# Utilities

In [34]:
def assert_bf4_tag(element):
    assert isinstance(element, bs4.element.Tag), \
    f"Expected BS4 tag but {element} of type {type(element)}"

In [35]:
def display_elements(elements):
    assert isinstance(elements, bs4.element.ResultSet) or isinstance(elements[0], bs4.element.Tag)
    for element in elements: # decimals="-3" means the displayed value is divied by 1000.
        print(f"{element.name:80} {element['unitref']:5} {element['decimals']:5} {element.text:15}")

In [36]:
def get_element_hash(element):
    """Generate the financial element hash key to uniquely identify an financial element record
    In a F/S, the same element, e.g. gaap:CashAndCashEquivalentsAtCarryingValue can be used at
    multiple places, one in B/S and one in P/L.
    
    To be able to identify if two elements are the same, provides a way to be able to compare
    two elements by generating a hash key from the attributes of an element.
    
    Args:
        element: bs4.element.Tag for an financial element
    Returns: hash key
    """
    assert isinstance(element, bs4.element.Tag)
    # key = f"{element.name}{element['unitref']}{element['contextref']}{element.text}"
    # key = f"{element.name}{element['unitref']}{element.text}"
    key = f"{element.name}{element['unitref']}"
    return hash(key)

In [37]:
# XBRL attribute conditions to match when extracting FS elements
ATTRIBUTES = {
    "contextref": CONTEXT_REGEXP,
    "decimals": True, 
    "unitref": True
}


def find_financial_elements(soup, element_names, attributes=ATTRIBUTES):
    """Find the financial statement elements from the XML/HTML source.
    Args:
        soup: BS4 source
        element_names: String or regexp instance to select the financial elements.
        attribute: tag attributes to select the financial elements
    Returns:
        List of BS4 tag objects that matched the element_names and attributes.
    """
    assert isinstance(soup, BeautifulSoup)
    assert isinstance(element_names, re.Pattern) or isinstance(element_names, str)

    
    names = element_names.lower() if isinstance(element_names, str) else element_names

    elements = soup.find_all(
        name=names,
        string=REGEXP_NUMERIC,
        attrs=attributes
    )
    
    # Select unique elements
    hashes = set([]) 
    results = []
    if elements is not None and len(elements) > 0:
        for element in elements:
            hash_value = get_element_hash(element)
            if hash_value not in hashes:
                results.append(element) 
                hashes.add(hash_value)
    return results

## FS elements of the period

In [38]:
for element in find_financial_elements(soup, re.compile(f"^{NAMESPACE_GAAP}:.*"), attributes=ATTRIBUTES):
    print(f"{element.name:100}:{element.text}")

us-gaap:revenues                                                                                    :1681000000
us-gaap:costofgoodsandservicessold                                                                  :1356168000
us-gaap:grossprofit                                                                                 :324832000
us-gaap:sellinggeneralandadministrativeexpense                                                      :72581000
us-gaap:researchanddevelopmentexpense                                                               :40790000
us-gaap:operatingexpenses                                                                           :113371000
us-gaap:operatingincomeloss                                                                         :211461000
us-gaap:interestexpense                                                                             :12896000
us-gaap:othernonoperatingincomeexpense                                                              :501000
us-ga

In [39]:
def get_financial_element_numeric_values(elements):
    # assert isinstance(elements, bs4.element.ResultSet) or isinstance(elements[0], bs4.element.Tag)
    assert_bf4_tag(elements[0])
    
    values = []
    for element in elements:
        assert re.match(REGEXP_NUMERIC, element.text.strip()), f"Element must be numeric but {element.text}"
        values.append(float(element.text))
        
    return values

In [40]:
def get_financial_element_columns():
    """Financial record columns"""
    return [
        "fs",           # Which financial statement e.g. bs for Balance Sheet
        "rep",          # Representative marker e.g. "income" or "cogs" (cost of goods sold)
        "type",         # "debit" or "credit"
        "name",         # F/S item name, e.g. us-gaap:revenues
        "value", 
        "unit",         # e.g. USD
        "decimals",     # Scale
        "context"       # XBRL context ID
    ]

def get_record_for_nil_elements(elements):
    return []

def get_records_for_financial_elements(elements):
    """Financial record having the columns of get_financial_element_columns"""
    # assert isinstance(elements, bs4.element.ResultSet) or isinstance(elements[0], bs4.element.Tag)
    assert_bf4_tag(elements[0])
    
    results = []
    for element in elements:
        # F/S
        element_fs = ""
        
        # Rep
        element_rep = ""
        
        # Type of the element
        element_type = None
        if element.name in GAAP_DEBIT_ITEMS: element_type = FS_ELEMENT_TYPE_DEBIT
        if element.name in GAAP_CREDIT_ITEMS: element_type = FS_ELEMENT_TYPE_CREDIT
        if element.name in GAAP_CALC_ITEMS: element_type = FS_ELEMENT_TYPE_CALC
        if element.name in GAAP_METRIC_ITEMS: element_type = FS_ELEMENT_TYPE_METRIC
        if element.name in GAAP_FACT_ITEMS: element_type = FS_ELEMENT_TYPE_FACT
            
        
        # Name of the financial element
        element_name = element.name
        
        # Unit of the financial element
        element_unit = element['unitref']
        
        # Scale of the element
        element_scale = int(element['decimals']) if element['decimals'].lower() != 'inf' else np.inf
            
        # Value of the element
        element_value = float(element.text)

        # Context ID of the element
        element_context = element['contextref']

        record = [
            element_fs,
            element_rep,
            element_type,
            element_name,
            element_value, 
            element_unit, 
            element_scale, 
            element_context
        ]
        assert len(record) == len(get_financial_element_columns())
        results.append(record)
        
    return results


def represents(records: list, fs: str, rep: str):
    assert isinstance(records, list) and len(fs) > 0 and len(rep) > 0
    if len(records) > 0:
        row = records[0]
        row[0] = fs
        row[1] = rep
    
    return records

In [41]:
def get_records_for_financial_element_names(soup, names: List[str]):
    """Get finacial records that matches the financial element names
    """
    elements = find_financial_elements(soup=soup, element_names=names)
    if len(elements) > 0:
        display_elements(elements)
        return get_records_for_financial_elements(elements)
    else:
        return get_record_for_nil_elements(elements)

In [42]:
def get_values_for_financial_element_names(soup, names: List[str]):
    elements = find_financial_elements(soup=soup, element_names=names)
    if len(elements) > 0:
        display_elements(elements)
        return get_financial_element_numeric_values(elements)
    else:
        return []

---
# Shares Outstanding

In [43]:
def get_shares_outstanding(soup):
    names = re.compile("|".join([
        rf"{NAMESPACE_GAAP}:SharesOutstanding",
        rf"{NAMESPACE_GAAP}:CommonStockSharesOutstanding",
        rf"{NAMESPACE_GAAP}:CommonStockOtherSharesOutstanding",
    ]).lower())

    return get_records_for_financial_element_names(soup=soup, names=names)

In [44]:
shares_outstandings = get_shares_outstanding(soup)
shares_outstandings

us-gaap:commonstocksharesoutstanding                                             shares -3    244190000      


[['',
  '',
  'fact',
  'us-gaap:commonstocksharesoutstanding',
  244190000.0,
  'shares',
  -3,
  'i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930']]

In [45]:
df_ShareOutstanding = pd.DataFrame(shares_outstandings)
df_ShareOutstanding

Unnamed: 0,0,1,2,3,4,5,6,7
0,,,fact,us-gaap:commonstocksharesoutstanding,244190000,shares,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930


---
# Statements of Income (P/L)

In [46]:
PL = []

## Revenues

In [47]:
PL += get_pl_revenues(soup, attributes=ATTRIBUTES)
PL += get_pl_revenue_other(soup, attributes=ATTRIBUTES)
PL += get_pl_income_interest(soup, attributes=ATTRIBUTES)
PL += get_pl_income_other(soup, attributes=ATTRIBUTES)

In [48]:
get_pl_revenues(soup, attributes=ATTRIBUTES)

[['pl',
  'revenue',
  'credit',
  'us-gaap:revenues',
  1681000000,
  'usd',
  -3,
  'icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930'],
 ['',
  '',
  'credit',
  'us-gaap:revenuefromcontractwithcustomerexcludingassessedtax',
  1235753000,
  'usd',
  -3,
  'i6546f5d3fe1e4db2af98357deb902bcd_D20210701-20210930']]

## Cost of Revenues

In [49]:
PL += get_pl_cost_of_revenues(soup, attributes=ATTRIBUTES)

## ***___# Gross Profit___***

In [50]:
PL += get_pl_gross_profit(soup, attributes=ATTRIBUTES)

## Operating Expenses

### Research and Development

In [51]:
PL += get_pl_operating_expense_r_and_d(soup, attributes=ATTRIBUTES)

### Administrative Expense

In [52]:
PL += get_pl_operating_expense_selling_administrative(soup, attributes=ATTRIBUTES)

### Other operating expenses

The total amount of other operating cost and expense items that are associated with the entity's normal revenue producing operation

In [53]:
PL += get_pl_operating_expense_other(soup, attributes=ATTRIBUTES)

## ***___# Total Operating Expenses___***

In [54]:
PL += get_pl_operating_expense_total(soup, attributes=ATTRIBUTES)

## ***___# Operating Income___***

```Operating Income = GrossProfit - Total Operating Expenses```

In [55]:
PL += get_pl_operating_income(soup, attributes=ATTRIBUTES)

## Non Operating Expenses

### Interest Expense

* [Investopedia - What Is an Interest Expense?](https://www.investopedia.com/terms/i/interestexpense.asp)

> An interest expense is the cost incurred by an entity for borrowed funds. Interest expense is a non-operating expense shown on the income statement. It represents interest payable on any borrowings – bonds, loans, convertible debt or lines of credit. It is essentially calculated as the interest rate times the outstanding principal amount of the debt. Interest expense on the income statement represents ***interest accrued during the period*** covered by the financial statements, and **NOT the amount of interest paid over that period**. While interest expense is tax-deductible for companies, in an individual's case, it depends on his or her jurisdiction and also on the loan's purpose.  
>
> For most people, mortgage interest is the single-biggest category of interest expense over their lifetimes as interest can total tens of thousands of dollars over the life of a mortgage as illustrated by online calculators.

In [56]:
PL += get_pl_non_operating_expense_interest(soup, attributes=ATTRIBUTES)

### Non-operating Expenses

In [57]:
PL += get_pl_non_operating_expense_other(soup, attributes=ATTRIBUTES)

## Income Tax

In [58]:
PL += get_pl_income_tax(soup, attributes=ATTRIBUTES)

## ***___# Net Income___***

$GrossProfit - (Operating Expenses + NonOperating Expense) - Tax$

In [59]:
PL += get_pl_net_income(soup, attributes=ATTRIBUTES)

## ***___# Shares Outstandings___***

In [60]:
PL += get_pl_shares_outstanding(soup, attributes=ATTRIBUTES)

## ***___# Net Income Per Share___***

* [US GAAP - Is Net Income Per Share the same with EPS?](https://money.stackexchange.com/questions/148015/us-gaap-is-net-income-per-share-the-same-with-eps)

In [61]:
def get_pl_eps(soup):
    return get_records_for_financial_element_names(
        soup=soup, names=f"{NAMESPACE_GAAP}:EarningsPerShareBasic".lower()
    ) + \
    get_records_for_financial_element_names(
        soup=soup, names=f"{NAMESPACE_GAAP}:EarningsPerShareBasicAndDiluted".lower()
    )

In [62]:
PL += represents(get_pl_eps(soup), fs=FS_PL, rep=FS_ELEMENT_REP_EPS)

us-gaap:earningspersharebasic                                                    usdPerShare 2     0.74           


## Display P/L

Is ```us-gaap:othernonoperatingincomeexpense``` credit or debit? As the value is **negative** and so is in the Income Statement, is shoudl be credit -> To be confirmed. 

In [63]:
df_PL = pd.DataFrame(PL, columns=get_financial_element_columns())
df_PL

Unnamed: 0,fs,rep,type,name,value,unit,decimals,context
0,pl,revenue,credit,us-gaap:revenues,1681000000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
1,,,credit,us-gaap:revenuefromcontractwithcustomerexcludingassessedtax,1235753000.0,usd,-3,i6546f5d3fe1e4db2af98357deb902bcd_D20210701-20210930
2,pl,operating_cost,debit,us-gaap:costofgoodsandservicessold,1356168000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
3,pl,gross_profit,calc,us-gaap:grossprofit,324832000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
4,pl,operating_expense_rd,debit,us-gaap:researchanddevelopmentexpense,40790000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
5,pl,operating_expense_sga,debit,us-gaap:sellinggeneralandadministrativeexpense,72581000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
6,pl,operating_expense,calc,us-gaap:operatingexpenses,113371000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
7,pl,operating_income,calc,us-gaap:operatingincomeloss,211461000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
8,,,debit,us-gaap:interestexpense,12896000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930
9,,,credit,us-gaap:othernonoperatingincomeexpense,501000.0,usd,-3,icb4fe7b89cc949678c0215fb0c6c75ed_D20210701-20210930


In [64]:
credits = df_PL[df_PL['type'] == 'credit']['value'].sum()
credits

2917254000.0

In [65]:
debits = df_PL[df_PL['type'] == 'debit']['value'].sum()
debits

1499654000.0

In [66]:
credits - debits  # Equal to the Net Income

1417600000.0

### EPS

In [67]:
shares = df_PL[df_PL['rep'] == FS_ELEMENT_REP_SHARES_OUTSTANDING]
if len(shares) == 1:
    num_shares = shares['value'].values.item()
else:
    assert False, f"No Shares OutStanding row found {df_PL}"

num_shares

244190000.0

In [68]:
shares = df_PL[df_PL['rep'] == FS_ELEMENT_REP_NET_INCOME]
if len(shares) == 1:
    net_income = shares['value'].item()
else:
    assert False, f"No Net Income row found {df_PL}"

net_income

181847000.0

In [69]:
eps = net_income / num_shares
scale = 2
print(f"{eps:.{scale}f}")

0.74


---
# Balance Sheet (B/S)

In [70]:
BS = []

## Cash & Cash Equivalents

Look for the cash and cash equivalents for the reporting perid in the Balance Sheet and Cash Flow statements of the  10-K.

In [71]:
def get_bs_current_asset_cash_and_equivalents(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:CashAndCashEquivalentsAtCarryingValue$",
    ]).lower())
    return represents(get_records_for_financial_element_names(soup=soup, names=names), fs=FS_BS, rep=FS_ELEMENT_REP_CASH)

BS += get_bs_current_asset_cash_and_equivalents(soup)

us-gaap:cashandcashequivalentsatcarryingvalue                                    usd   -3    545592000      


## Restricted Cash

In [72]:
def get_bs_current_asset_restricted_cash_and_equivalents(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:RestrictedCashEquivalentsCurrent$",
        rf"^{NAMESPACE_GAAP}:RestrictedCashAndCashEquivalentsAtCarryingValue$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_current_asset_restricted_cash_and_equivalents(soup)

us-gaap:restrictedcashandcashequivalentsatcarryingvalue                          usd   -3    929000         


## Short Term Investments

In [73]:
def get_bs_current_asset_short_term_investments(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:ShortTermInvestments$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_current_asset_short_term_investments(soup)

us-gaap:shortterminvestments                                                     usd   -3    244470000      


## Account Receivable

In [74]:
def get_bs_current_asset_account_receivables(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:ReceivablesNetCurrent$",
        rf"^{NAMESPACE_GAAP}:AccountsReceivableNetCurrent$",
        rf"^{NAMESPACE_GAAP}:OtherReceivables$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_current_asset_account_receivables(soup)

us-gaap:accountsreceivablenetcurrent                                             usd   -3    1289617000     


## ***___Inventory___***

In [75]:
def get_bs_current_asset_inventory(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:InventoryNet$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_current_asset_inventory(soup)

us-gaap:inventorynet                                                             usd   -3    445918000      


## Prepaid Expense / Other Assets Current

* [Understanding Prepaid Expenses](https://www.investopedia.com/terms/p/prepaidexpense.asp)

> Companies make prepayments for goods or services such as leased office equipment or insurance coverage that provide continual benefits over time. Goods or services of this nature cannot be expensed immediately because the expense would not line up with the benefit incurred over time from using the asset.  
>
> According to generally accepted accounting principles (GAAP), expenses should be recorded in the same accounting period as the benefit generated from the related asset.

* [us-gaap: PrepaidExpenseAndOtherAssetsCurrent](https://www.calcbench.com/element/PrepaidExpenseAndOtherAssetsCurrent)

> Amount of asset related to consideration paid in advance for costs that provide economic benefits in future periods, and amount of other assets that are expected to be realized or consumed within one year or the normal operating cycle, if longer.

* [Other Current Assets (OCA)](https://www.investopedia.com/terms/o/othercurrentassets.asp)

> Other current assets (OCA) is a category of things of value that a company owns, benefits from, or uses to generate income that can be converted into cash within one business cycle. They are referred to as “other” because they are uncommon or insignificant, unlike typical current asset items such as cash, securities, accounts receivable, inventory, and prepaid expenses.

In [76]:
def get_bs_current_asset_other(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:PrepaidExpenseCurrent$",
        rf"^{NAMESPACE_GAAP}:PrepaidExpenseAndOtherAssetsCurrent$",
        rf"^{NAMESPACE_GAAP}:OperatingLeaseRightOfUseAsset$",
        rf"^{NAMESPACE_GAAP}:OtherAssetsCurrent$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_current_asset_other(soup)

us-gaap:otherassetscurrent                                                       usd   -3    48598000       
us-gaap:operatingleaserightofuseasset                                            usd   -3    146210000      


## ***___# Current Assets___***

In [77]:
def get_bs_current_assets(soup):
    names = re.compile("|".join([
    rf"^{NAMESPACE_GAAP}:AssetsCurrent$",
    ]).lower())
    return represents(get_records_for_financial_element_names(soup=soup, names=names), fs=FS_BS, rep=FS_ELEMENT_REP_CURRENT_ASSETS)

BS += get_bs_current_assets(soup)    

us-gaap:assetscurrent                                                            usd   -3    2575124000     


## Property, Plant, Equipment

In [78]:
def get_bs_non_current_asset_property_and_equipment(soup):
    names = re.compile("|".join([
    rf"^{NAMESPACE_GAAP}:PropertyPlantAndEquipmentNet$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_non_current_asset_property_and_equipment(soup)

us-gaap:propertyplantandequipmentnet                                             usd   -3    2901564000     


In [79]:
## Restricted Cash Non Current

In [80]:
def get_bs_non_current_asset_restricted_cash_and_equivalent(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:RestrictedCashAndCashEquivalentsNoncurrent$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_non_current_asset_restricted_cash_and_equivalent(soup)

us-gaap:restrictedcashandcashequivalentsnoncurrent                               usd   -3    3508000        


## Deferred Tax

In [81]:
def get_bs_non_current_asset_deferred_income_tax(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:DeferredIncomeTaxAssetsNet$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_non_current_asset_deferred_income_tax(soup)

## ***___GoodWill___***

In [82]:
def get_bs_non_current_asset_goodwill(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:GoodWill$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_non_current_asset_goodwill(soup)

us-gaap:goodwill                                                                 usd   -3    25351000       


## Intangible and Other Assets

In [83]:
def get_bs_non_current_asset_other(soup):
    names = re.compile("|".join([
        rf"^{NAMESPACE_GAAP}:IntangibleAssetsNetExcludingGoodwill$",
        rf"^{NAMESPACE_GAAP}:OtherAssetsNoncurrent$",
    ]).lower())
    return get_records_for_financial_element_names(soup=soup, names=names)

BS += get_bs_non_current_asset_other(soup)

us-gaap:otherassetsnoncurrent                                                    usd   -3    118691000      


## ***___# Total Assets___***

In [84]:
BS += get_bs_total_assets(soup, attributes=ATTRIBUTES)

## Account Payable

* [Accounts Payable (AP)](https://www.investopedia.com/terms/a/accountspayable.asp)

> company's obligation to pay off a short-term debt to its creditors or suppliers.



* [Accrued Liability](https://www.investopedia.com/terms/a/accrued-liability.asp) (売掛金)

> costs for goods and services already delivered to a company for which it must pay in the future. A company can accrue liabilities for any number of obligations and are recorded on the company's balance sheet. They are normally listed on the balance sheet as current liabilities and are adjusted at the end of an accounting period.


* [us-gaap:AccruedLiabilitiesCurrent](http://xbrlsite.azurewebsites.net/2019/Prototype/references/us-gaap/Element-354.html)

In [85]:
BS += get_bs_current_liability_account_payable(soup, attributes=ATTRIBUTES)

## Tax

In [86]:
BS += get_bs_current_liability_tax(soup, attributes=ATTRIBUTES)

## Debt Due

In [87]:
BS += get_bs_current_liability_longterm_debt(soup, attributes=ATTRIBUTES)

## ***___# Current Liabilities___***

In [88]:
get_bs_current_liabilities(soup, attributes=ATTRIBUTES)

[['bs',
  'current_liabilities',
  'calc',
  'us-gaap:liabilitiescurrent',
  1738985000,
  'usd',
  -3,
  'i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930']]

## Long Term Debt

In [89]:
BS += get_bs_non_current_liability_longterm_debt(soup, attributes=ATTRIBUTES)

## Tax Deferred

In [90]:
BS += get_bs_non_current_liability_deferred_tax(soup, attributes=ATTRIBUTES)

## Other Long Term Liabilities

* [Postemployment Benefits Liability, Noncurrent/us-gaap:PostemploymentBenefitsLiabilityNoncurrent](http://xbrlsite.azurewebsites.net/2019/Prototype/references/us-gaap/Element-12380.html)

> The obligations recognized for the various benefits provided to former or inactive employees, their beneficiaries, and covered dependents after employment but before retirement that is payable after one year (or beyond the operating cycle if longer).

In [91]:
BS += get_bs_non_current_liability_other(soup, attributes=ATTRIBUTES)

## ***___# Total Liabilities___***

In [92]:
get_bs_total_liabilities(soup, attributes=ATTRIBUTES)

[['bs',
  'total_liabilities',
  'calc',
  'us-gaap:liabilities',
  3010769000,
  'usd',
  -3,
  'i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930']]

## Paid-in Capital

* [Paid-In Capital](https://www.investopedia.com/terms/p/paidincapital.aspPaid-In Capital)

> Paid-in capital represents the funds raised by the business through selling its equity and not from ongoing business operations. Paid-in capital also refers to a line item on the company's balance sheet listed under shareholders' equity (also referred to as stockholders' equity), often shown alongside the line item for additional paid-in capital.

* [Additional Paid-In Capital (APIC)](https://www.investopedia.com/terms/a/additionalpaidincapital.asp)

> Often referred to as "contributed capital in excess of par,” APIC occurs when an investor buys newly-issued shares directly from a company during its initial public offering (IPO) stage. APIC, which is itemized under the shareholder equity (SE) section of a balance sheet, is viewed as a profit opportunity for companies as it results in them receiving excess cash from stockholders.

In [93]:
BS += get_bs_stockholders_equity_paid_in(soup, attributes=ATTRIBUTES)

## Retained Earnings

* [Retained Earnings](https://www.investopedia.com/terms/r/retainedearnings.asp)

>  The word "retained" captures the fact that because those earnings were **NOT paid out to shareholders as dividends** they were instead retained by the company.

In [94]:
BS += get_bs_stockholders_equity_retained(soup, attributes=ATTRIBUTES)

## Accumulated other comprehensive income/loss

In [95]:
BS += get_bs_stockholders_equity_other(soup, attributes=ATTRIBUTES)

## ***___# Stockholder's Equity___***

* [Stockholders' Equity](https://www.investopedia.com/terms/s/stockholdersequity.asp)

> Remaining amount of assets available to shareholders after all liabilities have been paid. (純資産)

In [96]:
get_bs_stockholders_equity(soup, attributes=ATTRIBUTES)

[['bs',
  'stockholders_equity',
  'calc',
  'us-gaap:stockholdersequityincludingportionattributabletononcontrollinginterest',
  2759679000,
  'usd',
  -3,
  'i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930'],
 ['',
  '',
  'calc',
  'us-gaap:stockholdersequity',
  2730013000,
  'usd',
  -3,
  'i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930']]

## ***___# Total Liabilities + Stockholder's Equity___***

In [97]:
BS += get_bs_total_liabilities_and_stockholders_equity(soup, attributes=ATTRIBUTES)

## Display B/S

In [98]:
df_BS = pd.DataFrame(BS, columns=get_financial_element_columns())
#df_BS = df_BS.style.set_properties(**{'text-align': 'left'})
#df_BS[(df_BS['rep'].notna()) & (df_BS['rep'] != "")]
df_BS

Unnamed: 0,fs,rep,type,name,value,unit,decimals,context
0,bs,cash_and_equivalent,credit,us-gaap:cashandcashequivalentsatcarryingvalue,545592000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
1,,,credit,us-gaap:restrictedcashandcashequivalentsatcarryingvalue,929000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
2,,,credit,us-gaap:shortterminvestments,244470000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
3,,,credit,us-gaap:accountsreceivablenetcurrent,1289617000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
4,,,credit,us-gaap:inventorynet,445918000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
5,,,credit,us-gaap:otherassetscurrent,48598000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
6,,,credit,us-gaap:operatingleaserightofuseasset,146210000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
7,bs,current_assets,calc,us-gaap:assetscurrent,2575124000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
8,,,credit,us-gaap:propertyplantandequipmentnet,2901564000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930
9,,,credit,us-gaap:restrictedcashandcashequivalentsnoncurrent,3508000,usd,-3,i00aae6ee0447424d9c01fc5f8f3bd82d_I20210930


In [99]:
credits = df_BS[df_BS['type'] == 'credit']['value'].sum()
credits

5770448000.0

In [100]:
debits = df_BS[df_BS['type'] == 'debit']['value'].sum()
debits

4655370000.0

In [101]:
credits - debits

1115078000.0

### Cash Per Share

In [102]:
cash = df_BS[df_BS['rep'] == FS_ELEMENT_REP_CASH]['value'].values.item()
cps = cash / num_shares

scale = 2
print(f"{cps:.{scale}f}")

2.23


### EPS VS CPS

In [103]:
if (cps / eps) > 1.0:
    print(f"Saving is {cps/eps:.2f} more than earning")

Saving is 3.00 more than earning


# Save