# Create VIEWs on SEC DERA data by LEI

## Full-year FY REVENUES

## Point-in-time FLOAT, DEBT, CASH, ASSETS, (EV = FLOAT + DEBT - CASH, EVIC = FLOAT + DEBT)

In [1]:
import os
import pathlib
from dotenv import load_dotenv

# Load some standard environment variables from a dot-env file, if it exists.
# If no such file can be found, does not fail, and so allows these environment vars to
# be populated in some other way
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

Set session variable CATALOG to make query terms much more compact

In [2]:
import trino
from sqlalchemy.engine import create_engine

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

import pandas as pd

In [3]:
from osc_ingest_trino import *

trino_bucket = attach_s3_bucket("S3_DEV")

In [4]:
for tbl in ['t_r', 't_f', 't_d', 't_c', 't_a']:
    qres = engine.execute(f"drop table if exists sec_dera.{tbl}")
    print(qres.fetchall())

[(True,)]
[(True,)]
[(True,)]
[(True,)]
[(True,)]


### Revenue

For this table and others, we do not yet implement a mechanism for prioritizing amended values over initially reported values.  That is a TODO.

engine.execute("select *, if (fp='','fp should not be empty string', '') from sec_dera.sub as S where S.form='10-K' and S.fp!='FY'").fetchall()

In [5]:
qres = engine.execute("drop table if exists sec_dera.t_r").fetchall()
qres = engine.execute("""
create or replace view sec_dera.fy_revenue_by_lei as
select S.adsh, cik, name, lei, sic, fy, N.ddate, max(value) as revenue
from sec_dera.sub as S join sec_dera.num as N on S.adsh=N.adsh and year(S.fy)=year(N.ddate)
where (S.form='10-K' or S.form='20-F')
        and S.fy>=DATE('2019-01-01')
        and coreg is NULL
        and qtrs=4
        and uom='USD'
        and (N.tag='Revenues'
             or N.tag='RevenueFromContractWithCustomerIncludingAssessedTax'
             or N.tag='RevenueFromContractWithCustomerExcludingAssessedTax'
             or N.tag='RevenuesNetOfInterestExpense'
             or N.tag='RegulatedAndUnregulatedOperatingRevenue'
             or N.tag='RegulatedOperatingRevenuePipelines')
group by S.adsh, cik, name, lei, sic, fy, N.ddate
""")
display(qres.fetchall())

qres = engine.execute("""
create table sec_dera.t_r as
select * from sec_dera.fy_revenue_by_lei order by fy asc, name asc
""")
display(qres.fetchall())

[(True,)]

[(9598,)]

[(9598,)]

### Float (market cap)

TODO: The float calculation reads the reported overall float without regard to share class.  For companies that have multiple associated tickers, this gives wrong results.  And probably very wrong results for BRK.A vs. BRK.B

In [6]:
qres = engine.execute("drop table if exists sec_dera.t_f").fetchall()
qres = engine.execute("""
create or replace view sec_dera.float_by_adsh_ddate as
select S.adsh, fy, max(ddate) as f_ddate
from sec_dera.sub as S join sec_dera.num as N on S.adsh=N.adsh and year(S.fy)=year(N.ddate)
where (S.form='10-K' or S.form='20-F')
        and S.fy>=DATE('2019-01-01')
        and coreg is NULL
        and qtrs=0
        and uom='USD'
        and (tag='EntityPublicFloat'
             or tag='EntitysPublicFloat'
             or tag='FreeFloat'
             or tag='PublicFloat'
             or tag='PublicFloatValue')
group by S.adsh, fy
""")
print(qres.fetchall())

qres = engine.execute("""
create or replace view sec_dera.float_by_lei as
select S.adsh, S.cik, name, lei, tname, sic, S.fy, N.ddate, max(value) as market_cap
from sec_dera.sub as S
     join sec_dera.float_by_adsh_ddate as AD on S.adsh=AD.adsh -- and S.fy=AD.fy
     join sec_dera.num as N on AD.adsh=N.adsh and AD.f_ddate=N.ddate
     left join sec_dera.ticker T on S.cik=T.cik
     -- we already know we are looking at the correct S.form type
where value>0
      and coreg is NULL
      and qtrs=0
      and uom='USD'
      and (tag='EntityPublicFloat'
           or tag='EntitysPublicFloat'
           or tag='FreeFloat'
           or tag='PublicFloat'
           or tag='PublicFloatValue')
group by S.adsh, S.cik, name, lei, tname, sic, S.fy, N.ddate
""")
display(qres.fetchall())

qres = engine.execute("""
create table sec_dera.t_f as select * from sec_dera.float_by_lei
""")
display(qres.fetchall())

[(True,)]


[(True,)]

[(10786,)]

### Cash

In [7]:
qres = engine.execute("drop table if exists sec_dera.t_c").fetchall()
qres = engine.execute("""
create or replace view sec_dera.cash_by_adsh_ddate as
select S.adsh, fy, max(ddate) as c_ddate
from sec_dera.sub as S join sec_dera.num as N on S.adsh=N.adsh and year(S.fy)=year(N.ddate)
where (S.form='10-K' or S.form='20-F')
        and S.fy>=DATE('2019-01-01')
        and coreg is NULL
        and qtrs=0
        and uom='USD'
        and (tag='Cash'
             or tag='CashAndDueFromBanks'
             or tag='CashAndCashEquivalents'
             or lower(tag)=lower('CashAndCashEquivalentsUnrestricted')
             or tag='CashEquivalentsAtCarryingValue' 
             or tag='CashAndCashEquivalentsAtCarryingValue'
             or tag='CashAndCashEquivalentsAtCarryingValueExcludingVariableInterestEntities')
group by S.adsh, fy
""")
print(qres.fetchall())

qres = engine.execute("""
create or replace view sec_dera.cash_by_lei as
select S.adsh, S.cik, name, lei, sic, S.fy, N.ddate, max(value) as cash
from sec_dera.sub as S
     join sec_dera.cash_by_adsh_ddate as AD on S.adsh=AD.adsh -- and S.fy=AD.fy
     join sec_dera.num as N on AD.adsh=N.adsh and AD.c_ddate=N.ddate
where coreg is NULL
      and qtrs=0
      and uom='USD'
      and (tag='Cash'
             or tag='CashAndDueFromBanks'
             or tag='CashAndCashEquivalents'
             or lower(tag)=lower('CashAndCashEquivalentsUnrestricted')
             or tag='CashEquivalentsAtCarryingValue' 
             or tag='CashAndCashEquivalentsAtCarryingValue'
             or tag='CashAndCashEquivalentsAtCarryingValueExcludingVariableInterestEntities')
group by S.adsh, S.cik, name, lei, sic, S.fy, N.ddate
""")
display(qres.fetchall())

qres = engine.execute("""
create table sec_dera.t_c as select * from sec_dera.cash_by_lei
""")
display(qres.fetchall())

[(True,)]


[(True,)]

[(11465,)]

### Debt

In [8]:
# Reiterating comment embedded below: we should sum 'LongTermDebtNoncurrent' and 'LongTermDebtCurrent' as one term we MAX with others
qres = engine.execute("drop table if exists sec_dera.t_d").fetchall()
qres = engine.execute("""
create or replace view sec_dera.debt_by_adsh_ddate as
select S.adsh, fy, max(ddate) as d_ddate
from sec_dera.sub as S join sec_dera.num as N on S.adsh=N.adsh and year(S.fy)=year(N.ddate)
where (S.form='10-K' or S.form='20-F')
        and S.fy>=DATE('2019-01-01')
        and coreg is NULL
        and qtrs=0
        and uom='USD'
        and (tag='LongTermDebt' or tag='LongTermDebtFairValue'
             or tag='LongTermDebtAndCapitalLeaseObligations' or tag='DebtAndCapitalLeaseObligations'
             or tag='DebtLongtermAndShorttermCombinedAmount' or tag='SecuredDebt' or tag='UnsecuredDebt'
             or tag='OperatingLeaseLiabilityNoncurrent'
             or tag='SubordinatedDebt' or tag='ConvertibleDebt'
             or tag='LongTermLineOfCredit' or tag='OtherBorrowings' or tag='NotesAndLoansReceivableNetNoncurrent'
             -- NOTE: A MORE ACCURATE ANSWER COMES FROM SUMMING THESE TWO AND COMPARING WITH THE ABOVE (ALREADY-COMBINED) DEBT METRICS
             or tag='LongTermDebtNoncurrent' or tag='LongTermDebtCurrent')
group by S.adsh, fy
""")
print(qres.fetchall())

qres = engine.execute("""
create or replace view sec_dera.debt_by_lei as
select S.adsh, S.cik, name, lei, sic, S.fy, N.ddate, max(value) as debt
from sec_dera.sub as S
     join sec_dera.debt_by_adsh_ddate as AD on S.adsh=AD.adsh -- and S.fy=AD.fy
     join sec_dera.num as N on AD.adsh=N.adsh and AD.d_ddate=N.ddate
where coreg is NULL
      and qtrs=0
      and uom='USD'
      and (tag='LongTermDebt' or tag='LongTermDebtFairValue'
           or tag='LongTermDebtAndCapitalLeaseObligations' or tag='DebtAndCapitalLeaseObligations'
           or tag='DebtLongtermAndShorttermCombinedAmount' or tag='SecuredDebt' or tag='UnsecuredDebt'
           or tag='OperatingLeaseLiabilityNoncurrent'
           or tag='SubordinatedDebt' or tag='ConvertibleDebt'
           or tag='LongTermLineOfCredit' or tag='OtherBorrowings' or tag='NotesAndLoansReceivableNetNoncurrent'
           -- NOTE: A MORE ACCURATE ANSWER COMES FROM SUMMING THESE TWO AND COMPARING WITH THE ABOVE (ALREADY-COMBINED) DEBT METRICS
           or tag='LongTermDebtNoncurrent' or tag='LongTermDebtCurrent')
group by S.adsh, S.cik, name, lei, sic, S.fy, N.ddate
""")
display(qres.fetchall())

qres = engine.execute("""
create table sec_dera.t_d as select * from sec_dera.debt_by_lei
""")
display(qres.fetchall())

[(True,)]


[(True,)]

[(8729,)]

### Assets

In [9]:
qres = engine.execute("drop table if exists sec_dera.t_a").fetchall()
qres = engine.execute("""
create or replace view sec_dera.assets_by_adsh_ddate as
select S.adsh, fy, max(ddate) as a_ddate
from sec_dera.sub as S join sec_dera.num as N on S.adsh=N.adsh and year(S.fy)=year(N.ddate)
where (S.form='10-K' or S.form='20-F')
        and S.fy>=DATE('2019-01-01')
        and coreg is NULL
        and qtrs=0
        and uom='USD'
        and tag='Assets'
group by S.adsh, fy
""")
print(qres.fetchall())

qres = engine.execute("""
create or replace view sec_dera.assets_by_lei as
select S.adsh, S.cik, name, lei, sic, S.fy, N.ddate, value as assets
from sec_dera.sub as S
     join sec_dera.assets_by_adsh_ddate as AD on S.adsh=AD.adsh -- and S.fy=AD.fy
     join sec_dera.num as N on AD.adsh=N.adsh and AD.a_ddate=N.ddate
where coreg is NULL
      and qtrs=0
      and uom='USD'
      and tag='Assets'
""")
display(qres.fetchall())

qres = engine.execute("""
create table sec_dera.t_a as select * from sec_dera.assets_by_lei
""")
display(qres.fetchall())

[(True,)]


[(True,)]

[(11730,)]

In [10]:
qres = engine.execute("""
create or replace view sec_dera.financials_by_lei as
select R.name, R.lei,
       tname,
       R.fy, R.ddate, R.sic, revenue, cash, debt, assets, market_cap
       -- , C.ddate as c_d, D.ddate as d_d, A.ddate as a_d, F.ddate as f_d
from sec_dera.t_r as R
     left join sec_dera.t_c as C on R.cik=C.cik and R.adsh=C.adsh
     left join sec_dera.t_d as D on R.cik=D.cik and R.adsh=D.adsh
     left join sec_dera.t_a as A on R.cik=A.cik and R.adsh=A.adsh
     left join sec_dera.t_f as F on R.cik=F.cik and R.adsh=F.adsh
where tname not like '%-%'
""")
display(qres.fetchall())

qres = engine.execute("""
select count (*) from sec_dera.financials_by_lei
""")
display(qres.fetchall())

[(True,)]

[(7806,)]

In [11]:
stop!

SyntaxError: invalid syntax (<ipython-input-11-8c726f94684c>, line 1)

In [None]:
qres = engine.execute("""
explain analyze
select R.name, R.lei, R.tname, R.fy, R.sic, revenue, cash -- , debt --, assets, market_cap
from sec_dera.fy_revenue_by_lei as R
     join sec_dera.cash_by_lei as C on R.name=C.name and R.fy=C.fy
     -- join sec_dera.debt_by_lei as D on R.name=D.name and R.fy=D.fy
     -- join sec_dera.assets_by_lei as A on R.name=A.name and R.fy=A.fy
     -- join sec_dera.float_by_lei as F on R.name=F.name and R.fy=F.fy
""")
l = qres.fetchall()

In [None]:
qres = engine.execute("""
explain analyze
select R.name, R.lei, R.tname, R.fy, R.sic, revenue,
    -- cash,
    debt,
    -- assets,
    market_cap
from sec_dera.fy_revenue_by_lei as R
     -- join sec_dera.cash_by_lei as C on R.name=C.name and R.fy=C.fy
     join sec_dera.debt_by_lei as D on R.name=D.name and R.fy=D.fy
     -- join sec_dera.assets_by_lei as A on R.name=A.name and R.fy=A.fy
     join sec_dera.float_by_lei as F on R.name=F.name and R.fy=F.fy
""")
l = qres.fetchall()

In [None]:
qres = engine.execute("""
create or replace view sec_dera.financials_by_lei as
select name, lei, fy, Nrevenue.ddate as r_ddate, sic,
       max(Nrevenue.value) as revenue,
       max(Ncash.value) as cash,
       max(Ndebt.value) as debt,
       max(Nassets.value) as assets,
       max(Nfloat.value) as market_cap
       -- max(Nfloat.value)+max(Ndebt.value)-max(Ncash.value) as ev,
       -- max(Nfloat.value)+max(Ndebt.value) as evic
from sec_dera.sub as S
      join sec_dera.num as Nrevenue on S.adsh=Nrevenue.adsh
            and Nrevenue.qtrs=4
            and Nrevenue.ddate>=S.fy and date_add('year', 1, S.fy)>Nrevenue.ddate
      join sec_dera.num as Nassets on S.adsh=Nassets.adsh and Nassets.ddate=Nrevenue.ddate
      join sec_dera.num as Ndebt on S.adsh=Ndebt.adsh and Ndebt.ddate=Nrevenue.ddate
      join sec_dera.num as Ncash on S.adsh=Ncash.adsh and Ncash.ddate=Nrevenue.ddate
      join sec_dera.num as Nfloat on S.adsh=Nfloat.adsh
where S.fp='FY' and (S.form='10-K' or S.form='20-F')
      -- and S.lei='I1BZKREC126H0VB1BL91'
and Nrevenue.coreg is NULL
and Nrevenue.uom='USD'
and (Nrevenue.tag='Revenues'
     or Nrevenue.tag='RevenueFromContractWithCustomerIncludingAssessedTax'
     or Nrevenue.tag='RevenueFromContractWithCustomerExcludingAssessedTax'
     or Nrevenue.tag='RevenuesNetOfInterestExpense'
     or Nrevenue.tag='RegulatedAndUnregulatedOperatingRevenue'
     or Nrevenue.tag='RegulatedOperatingRevenuePipelines')
and Nfloat.qtrs=0 and Nfloat.ddate>=fy and date_add('year', 1, fy)>Nfloat.ddate
and Nfloat.coreg is NULL
and Nfloat.uom='USD'
and (Nfloat.tag='EntityPublicFloat'
     or Nfloat.tag='EntitysPublicFloat'
     or Nfloat.tag='FreeFloat'
     or Nfloat.tag='PublicFloat'
     or Nfloat.tag='PublicFloatValue')
and Ncash.qtrs=0
and Ncash.coreg is NULL
and Ncash.uom='USD'
and (Ncash.tag='CashAndCashEquivalentsAtCarryingValue'
     or Ncash.tag='Cash'
     or Ncash.tag='CashEquivalentsAtCarryingValue'
     or Ncash.tag='CashAndCashEquivalents')
and Ndebt.qtrs=0
and Ndebt.coreg is NULL
and Ndebt.uom='USD'
and (Ndebt.tag='LongTermDebt'
     or Ndebt.tag='LongTermDebtFairValue'
     or Ndebt.tag='LongTermDebtAndCapitalLeaseObligations'
     or Ndebt.tag='DebtLongtermAndShorttermCombinedAmount'
     or Ndebt.tag='SecuredDebt'
     or Ndebt.tag='UnsecuredDebt'
     or Ndebt.tag='SubordinatedDebt'
     or Ndebt.tag='ConvertibleDebt')
and Nassets.qtrs=0
and Nassets.coreg is NULL
and Nassets.uom='USD'
and Nassets.tag='Assets'
group by name, lei, fy, Nrevenue.ddate, sic
""")
display(qres.fetchall())

qres = engine.execute("""
select * from sec_dera.financials_by_lei
""")
l = qres.fetchall()

print(len(l))

In [None]:
columns = [x[0] for x in engine.execute("describe sec_dera.financials_by_lei").fetchall()]

In [None]:
df = pd.DataFrame(data=l, columns=columns)

In [None]:
df

In [None]:
df = df.convert_dtypes()

drop_unmanaged_table("osc_datacommons_dev", "sec_dera", "corp_data_df", engine, trino_bucket, verbose=True)

drop_unmanaged_data("sec_dera", "corp_data_df", trino_bucket, verbose=True)

ingest_unmanaged_parquet(df, "sec_dera", "corp_data_df", trino_bucket, partition_columns=[],
                         append=False, workdir='/tmp', verbose=True)

sql = unmanaged_parquet_tabledef(df, "osc_datacommons_dev", "sec_dera", "corp_data_df", trino_bucket, partition_columns = [],
                                 verbose=True)
qres = engine.execute(sql)
display(qres.fetchall())

qres = engine.execute("select * from sec_dera.corp_data_df")
l = qres.fetchall()

In [None]:
qres = engine.execute("""
explain select * from sec_dera.financials_by_lei
""")
print(qres.fetchall()[0][0])