# Create VIEWs on SEC DERA data by LEI

## Full-year FY REVENUES, INCOME

## Point-in-time FLOAT, DEBT, CASH, ASSETS, (EV = FLOAT + DEBT - CASH, EVIC = FLOAT + DEBT)

In this workflow we assume (and depend upon) the interesting NUM data coming only from the consolidated entity (NUM.COREG IS NULL) and not from co-registrants.  
Please explore EXCELON (EXC) and DOMINION ENERGY (D) to see examples of reports with substantial co-registrant entities.

*Prerequites*: DERA-ingest, osc-ingest-rim_utility_transition_hub, GLEIF

In [1]:
import os
import pathlib
import osc_ingest_trino as osc

# Load environment variables from credentials.env
osc.load_credentials_dotenv()

Set session variable CATALOG to make query terms much more compact

In [2]:
iceberg_catalog = 'osc_datacommons_dev'
iceberg_schema = 'dera'
rmi_schema = 'rmi'
gleif_schema = 'sandbox'

dera_prefix = 'dera_'

engine = osc.attach_trino_engine(catalog=iceberg_catalog, schema=iceberg_schema, verbose=False)

import pandas as pd

In [3]:
for tbl in ['t_r', 't_f', 't_d', 't_c', 't_a', 't_i']:
    qres = osc._do_sql(f"drop table if exists {tbl}", engine, verbose=True)

drop table if exists t_r
drop table if exists t_f
drop table if exists t_d
drop table if exists t_c
drop table if exists t_a
drop table if exists t_i


### FX

Many public companies are non-US companies or report facts in units of measurement other than USD.  We build a small table of market knowledge so that we can do foreign exchange (FX) calculations.

In [4]:
sql = (f"""
select count (*) as count, uom, ddate, max(value) as maxval, min(value) as minval from {dera_prefix}num
where ddate>=DATE('2017-01-01')
      and (tag='ClosingForeignExchangeRate' or tag='ForeignCurrencyExchangeRateTranslation1')
      and qtrs=0 and uom!= 'USD' and (uom like '%USD%' or length(uom)=3)
group by uom, ddate, tag
order by ddate, uom, tag
""")
df_closing_fx = pd.read_sql (sql, engine, parse_dates=['ddate'])
# display(df_closing_fx)

sql = (f"""
select count (*) as count, uom, ddate, max(value) as maxval, min(value) as minval from {dera_prefix}num
where ddate>=DATE('2017-01-01')
      and tag='AverageForeignExchangeRate'
      and qtrs=4 and uom!= 'USD' and (uom like '%USD%' or length(uom)=3)
group by uom, ddate, tag
order by ddate, uom, tag
""")
df_average_fx = pd.read_sql (sql, engine, parse_dates=['ddate'])
# display(df_average_fx)

In [5]:
import re

def create_fx_dict(df):
    """From a list of FX pairs or non-USD (presumed to convert to USD) FX rates by dates, return a dictionary of FX rates by dates"""
    df_gbp = df[df.uom.str.contains(r'GBP', flags=re.I)]
    df_eur = df[df.uom.str.contains(r'EUR', flags=re.I)]
    df_other = df[~df.uom.str.contains(r'(GBP)|(EUR)', flags=re.I)]
    
    fx_input = {'GBP': df_gbp, 'EUR': df_eur }

    fx_df = {}

    for fx in ['GBP', 'EUR']:
        df_base = fx_input[fx].loc[df.maxval>1, ['ddate', 'maxval']].groupby(by='ddate').mean()
        df_base['minval'] = 1/df_base.maxval
        df_quote = fx_input[fx].loc[df.minval<1, ['ddate', 'minval']].groupby(by='ddate').mean()
        df_quote['maxval'] = 1/df_quote.minval
        df_fx = pd.concat((df_base, df_quote), axis=0).groupby(by='ddate').mean().reset_index()
        df_fx['base'] = fx
        df_fx.rename(columns={'maxval':'to_USD', 'minval':'from_USD'}, inplace=True)
        df_fx = df_fx[['base', 'to_USD', 'from_USD', 'ddate']]
        fx_df[fx] = df_fx.convert_dtypes()
    
    for fx in list({x[int(x[-1]=='USD')-1].upper():x[int(x[0]=='USD')-1].upper() for x in df_other.uom.str.split(r'[-_/]').values }.keys()):
        df_other_fx = df_other[df_other.uom.str.contains(fx, flags=re.I)]
        df_base = df_other_fx.loc[df.maxval>1, ['ddate', 'maxval']].groupby(by='ddate').mean()
        df_base['minval'] = 1/df_base.maxval
        df_quote = df_other_fx.loc[df.minval<1, ['ddate', 'minval']].groupby(by='ddate').mean()
        df_quote['maxval'] = 1/df_quote.minval
        df_fx = pd.concat((df_base, df_quote), axis=0).groupby(by='ddate').mean().reset_index()
        df_fx['base'] = fx
        df_fx.rename(columns={'maxval':'from_USD', 'minval':'to_USD'}, inplace=True)
        df_fx = df_fx[['base', 'to_USD', 'from_USD', 'ddate']]
        fx_df[fx] = df_fx.convert_dtypes()
    
    return fx_df

The `closing_fx` table contains point-in-time FX market information.

The `average_fx` table contains a four-quarter average of FX rates.

In [6]:
def create_fx_table(df_fx, ingest_table):
    df = osc.enforce_sql_column_names(pd.concat(create_fx_dict(df_fx).values()))
    columnschema = osc.create_table_schema_pairs(df, typemap={'datetime64[ns]':'timestamp(6)'})
    qres = osc._do_sql(f"drop table if exists {ingest_table}", engine, verbose=True)

    tabledef = f"""
create table if not exists {ingest_table}(
{columnschema}
) with (
partitioning = array['base'],
format = 'ORC'
)
    """
    table_create = osc._do_sql(tabledef, engine, verbose=True)

    if False:
        dataset_query = (f'SELECT * FROM {ingest_table} limit 10')
        print(dataset_query)
        dataset = osc._do_sql(dataset_query, engine, verbose=True)

    return df

for df, ingest_table in [ (df_closing_fx,'closing_fx'), (df_average_fx,'average_fx') ]:
    df = create_fx_table(df, ingest_table)
    df.to_sql(ingest_table,
             con=engine, schema=iceberg_schema, if_exists='append',
             index=False,
             method=osc.TrinoBatchInsert(batch_size = 1000, verbose = False))

  df_other = df[~df.uom.str.contains(r'(GBP)|(EUR)', flags=re.I)]


drop table if exists closing_fx

create table if not exists closing_fx(
    base varchar,
    to_usd double,
    from_usd double,
    ddate timestamp(6)
) with (
partitioning = array['base'],
format = 'ORC'
)
    


  df_other = df[~df.uom.str.contains(r'(GBP)|(EUR)', flags=re.I)]


drop table if exists average_fx

create table if not exists average_fx(
    base varchar,
    to_usd double,
    from_usd double,
    ddate timestamp(6)
) with (
partitioning = array['base'],
format = 'ORC'
)
    


### Annual Tables: Revenue and Income

We  only ingest annual data (NUM.QTRS=4) for Revenues and Income.  

Many companies report not only the present fiscal year data but two prior years as well.  
The row_number() code ensures we use the latest reported fact for a given year

In [7]:
common_dera_form_test = "(S.form='10-K' or S.form='20-F' or S.form='40-F')"
common_dera_fy_test = "S.fy>=DATE('2014-01-01')"
common_dera_columns = "S.adsh, S.cik, S.name, S.lei, S.sic, S.fy, N.ddate, N.uom"
partition_bits = "partition by cik, fy order by date_diff('day', fy, ddate) DESC"

In [8]:
def generate_annual_dera_usd_query(what, tags):
    usd_query = f"""
create or replace view fy_{what}_usd_by_lei as
select {common_dera_columns}, max(value) as {what}
from {dera_prefix}sub as S join {dera_prefix}num as N on S.adsh=N.adsh
where {common_dera_form_test}
      and {common_dera_fy_test}
      and uom='USD'
      and coreg is NULL
      and qtrs=4
      and ({' or '.join(["N.tag='"+tag+"'" for tag in tags])})
group by {common_dera_columns}
"""
    return usd_query

def generate_annual_dera_xyz_query(what, tags):
    # The X term prevents us from collecting foreign currency info that's duplicative of USD info we prefer
    xyz_query = f"""
create or replace view fy_{what}_xyz_by_lei as
select {common_dera_columns}, max(N.value) as {what}
from {dera_prefix}sub as S left join fy_{what}_usd_by_lei X on S.adsh=X.adsh
     join {dera_prefix}num as N on S.adsh=N.adsh and (X.adsh IS NULL or X.ddate=N.ddate)
where X.ddate IS NULL
      and {common_dera_form_test}
      and {common_dera_fy_test}
      and N.uom!='USD'
      and coreg is NULL
      and qtrs=4
      and ({' or '.join(["N.tag='"+tag+"'" for tag in tags])})
group by {common_dera_columns}
"""
    return xyz_query

def generate_annual_dera_combined_query(tbl, what, month_list):
    query = f"""
create table {tbl} as
select {re.sub(r'[SN].','V_USD.',common_dera_columns)}, V_USD.{what} as {what}_usd
from (select *,
             row_number() over ({partition_bits}) as rn
      from fy_{what}_usd_by_lei) as V_USD
where rn=1
UNION ALL
select {re.sub(r'[SN].','V_XYZ.',common_dera_columns)},
       coalesce(FX.to_usd, FX_1mo.to_usd, FX_2mo.to_usd, FX_3mo.to_usd, FX_avg.to_usd, FX_avg_py.to_usd, FX_avg_1231.to_usd) * V_XYZ.{what} as {what}_usd
from (select *,
             row_number() over ({partition_bits}) as rn
      from fy_{what}_xyz_by_lei) as V_XYZ
     left join closing_fx FX on V_XYZ.uom=FX.base and V_XYZ.ddate=FX.ddate
     {' '.join(str(nmonths).join([f"left join closing_fx FX_",
                                  "mo on V_XYZ.uom=FX_",
                                  "mo.base and date_diff('month', FX_", "mo.ddate, V_XYZ.ddate)=", ""]) for nmonths in month_list)}
     left join average_fx FX_avg on V_XYZ.uom=FX_avg.base and V_XYZ.ddate=FX_avg.ddate
     left join average_fx FX_avg_py on V_XYZ.uom=FX_avg_py.base and V_XYZ.ddate=date_add('year', 1, FX_avg_py.ddate)
     left join average_fx FX_avg_1231 on V_XYZ.uom=FX_avg_1231.base and DATE(cast (year(V_XYZ.ddate)-1 as varchar)||'-12-31')=FX_avg_1231.ddate
where rn=1
"""
    return query

# print(generate_annual_dera_combined_query('t_r', 'revenue', [1, 2, 3]))

In [9]:
def generate_instant_dera_usd_query(what, tags):
    usd_query = f"""
create or replace view {what}_usd_by_lei as
select {common_dera_columns}, max(value) as {what}
from {dera_prefix}sub as S
     join {dera_prefix}num as N on S.adsh=N.adsh
where {common_dera_form_test}
      and {common_dera_fy_test}
      and value>0
      and uom='USD'
      and coreg is NULL
      and qtrs=0
      and ({' or '.join(["N.tag='"+tag+"'" for tag in tags])})
group by {common_dera_columns}
"""
    return usd_query

def generate_instant_dera_xyz_query(what, tags):
    xyz_query = f"""
create or replace view {what}_xyz_by_lei as
select {common_dera_columns}, max(value) as {what}
from {dera_prefix}sub as S
     left join {what}_usd_by_lei X on S.adsh=X.adsh
     join {dera_prefix}num as N on S.adsh=N.adsh and (X.adsh IS NULL or X.ddate=N.ddate)
where X.ddate IS NULL
      and {common_dera_form_test}
      and {common_dera_fy_test}
      and value>0
      and N.uom!='USD'
      and coreg is NULL
      and qtrs=0
      and ({' or '.join(["N.tag='"+tag+"'" for tag in tags])})
group by {common_dera_columns}
"""
    return xyz_query

def generate_instant_dera_combined_query(tbl, what, month_list):
    query = f"""
create table {tbl} as
select {re.sub(r'[SN].','V_USD.',common_dera_columns)}, V_USD.{what} as {what}_usd
from (select *,
             row_number() over ({partition_bits}) as rn
      from {what}_usd_by_lei) as V_USD
where rn=1
UNION ALL
select {re.sub(r'[SN].','V_XYZ.',common_dera_columns)},
       coalesce(FX.to_usd, FX_1mo.to_usd, FX_2mo.to_usd, FX_3mo.to_usd, FX_avg.to_usd, FX_avg_py.to_usd, FX_avg_1231.to_usd) * V_XYZ.{what} as {what}_usd
from (select *,
             row_number() over ({partition_bits}) as rn
      from {what}_xyz_by_lei) as V_XYZ
     left join closing_fx FX on V_XYZ.uom=FX.base and V_XYZ.ddate=FX.ddate
     {' '.join(str(nmonths).join([f"left join closing_fx FX_",
                                  "mo on V_XYZ.uom=FX_",
                                  "mo.base and date_diff('month', FX_",
                                  "mo.ddate, V_XYZ.ddate)=",
                                  ""]) for nmonths in month_list)}
     left join average_fx FX_avg on V_XYZ.uom=FX_avg.base and V_XYZ.ddate=FX_avg.ddate
     left join average_fx FX_avg_py on V_XYZ.uom=FX_avg_py.base and V_XYZ.ddate=date_add('year', 1, FX_avg_py.ddate)
     left join average_fx FX_avg_1231 on V_XYZ.uom=FX_avg_1231.base and DATE(cast (year(V_XYZ.ddate)-1 as varchar)||'-12-31')=FX_avg_1231.ddate
where rn=1
"""
    return query

print(generate_instant_dera_combined_query('t_f', 'float', [1, 2, 3]))


create table t_f as
select V_USD.adsh, V_USD.cik, V_USD.name, V_USD.lei, V_USD.sic, V_USD.fy, V_USD.ddate, V_USD.uom, V_USD.float as float_usd
from (select *,
             row_number() over (partition by cik, fy order by date_diff('day', fy, ddate) DESC) as rn
      from float_usd_by_lei) as V_USD
where rn=1
UNION ALL
select V_XYZ.adsh, V_XYZ.cik, V_XYZ.name, V_XYZ.lei, V_XYZ.sic, V_XYZ.fy, V_XYZ.ddate, V_XYZ.uom,
       coalesce(FX.to_usd, FX_1mo.to_usd, FX_2mo.to_usd, FX_3mo.to_usd, FX_avg.to_usd, FX_avg_py.to_usd, FX_avg_1231.to_usd) * V_XYZ.float as float_usd
from (select *,
             row_number() over (partition by cik, fy order by date_diff('day', fy, ddate) DESC) as rn
      from float_xyz_by_lei) as V_XYZ
     left join closing_fx FX on V_XYZ.uom=FX.base and V_XYZ.ddate=FX.ddate
     left join closing_fx FX_1mo on V_XYZ.uom=FX_1mo.base and date_diff('month', FX_1mo.ddate, V_XYZ.ddate)=1 left join closing_fx FX_2mo on V_XYZ.uom=FX_2mo.base and date_diff('month', FX_2mo.ddate

In [10]:
revenue_tags = [
    'Revenue','Revenues','RevenueFromContractsWithCustomers',
    'RevenueFromContractWithCustomerIncludingAssessedTax',
    'RevenueFromContractWithCustomerExcludingAssessedTax',
    'RevenuesNetOfInterestExpense',
    'RegulatedAndUnregulatedOperatingRevenue',
    'RegulatedOperatingRevenuePipelines',
    'SalesRevenueGoodsNet'
]

income_tags = [
    'ProfitLoss','NetIncomeLoss','ComprehensiveIncome'
]

dera_annual_dict = {
    'revenue': revenue_tags,
    'income': income_tags,
}


for what, tags in dera_annual_dict.items():
    tbl = 't_' + what[0]
    print(f'Creating annual table `{tbl}` for `{what}`')
    
    query_usd = generate_annual_dera_usd_query(what, tags)
    print(query_usd)
    
    query_xyz = generate_annual_dera_xyz_query(what, tags)
    print(query_xyz)
    
    qres = osc._do_sql(f"drop table if exists {tbl}", engine, verbose=True)
    qres = osc._do_sql(query_usd, engine, verbose=False)
    qres = osc._do_sql(query_xyz, engine, verbose=False)

    query_final = generate_annual_dera_combined_query(tbl, what, [1, 2, 3])
    qres = osc._do_sql(query_final, engine, verbose=True)

Creating annual table `t_r` for `revenue`

create or replace view fy_revenue_usd_by_lei as
select S.adsh, S.cik, S.name, S.lei, S.sic, S.fy, N.ddate, N.uom, max(value) as revenue
from dera_sub as S join dera_num as N on S.adsh=N.adsh
where (S.form='10-K' or S.form='20-F' or S.form='40-F')
      and S.fy>=DATE('2014-01-01')
      and uom='USD'
      and coreg is NULL
      and qtrs=4
      and (N.tag='Revenue' or N.tag='Revenues' or N.tag='RevenueFromContractsWithCustomers' or N.tag='RevenueFromContractWithCustomerIncludingAssessedTax' or N.tag='RevenueFromContractWithCustomerExcludingAssessedTax' or N.tag='RevenuesNetOfInterestExpense' or N.tag='RegulatedAndUnregulatedOperatingRevenue' or N.tag='RegulatedOperatingRevenuePipelines' or N.tag='SalesRevenueGoodsNet')
group by S.adsh, S.cik, S.name, S.lei, S.sic, S.fy, N.ddate, N.uom


create or replace view fy_revenue_xyz_by_lei as
select S.adsh, S.cik, S.name, S.lei, S.sic, S.fy, N.ddate, N.uom, max(N.value) as revenue
from dera_sub as S 

### Instant Tables: Assets, Cash, Debt, and Float (market cap)

These tables all measure point-in-time values.  We only measure the points-in-time that are part of annual reports. 

Many companies report not only the present fiscal year data but two prior years as well.  
The row_number() code ensures we use the latest reported fact for a given year

TODO: The float calculation reads the reported overall float without regard to share class.  For companies that have multiple associated tickers, this gives wrong results.  And probably very wrong results for BRK.A vs. BRK.B
For starters, look at NG (National Grid plc), NGG (US Depository of NG), and its various preferred classes

In [11]:
float_tags = [
    'EntityPublicFloat',
    'FreeFloat',
    'PublicFloat',
    'PublicFloatValue',
    'ComputedFloat',
    'ComputedMarketFloat',
    'ComputedTreasuryFloat',
]

cash_tags = [
    'Cash','CashAndDueFromBanks','CashAndCashEquivalents',
    'CashAndCashEquivalentsUnrestricted', # this has some funkiness with upper/lower case data
    'CashEquivalentsAtCarryingValue',
    'CashAndCashEquivalentsAtCarryingValue',
    'CashAndCashEquivalentsAtCarryingValueExcludingVariableInterestEntities'
]

debt_tags = [
    'LongTermDebt','LongTermDebtFairValue','LongTermDebtAndCapitalLeaseObligations',
    'DebtAndCapitalLeaseObligations','DebtLongtermAndShorttermCombinedAmount',
    'SecuredDebt','UnsecuredDebt','OperatingLeaseLiabilityNoncurrent',
    'SubordinatedDebt','ConvertibleDebt','LongTermLineOfCredit',
    'OtherBorrowings','NotesAndLoansReceivableNetNoncurrent',
    # NOTE: A MORE ACCURATE ANSWER COMES FROM SUMMING THESE TWO AND COMPARING WITH THE ABOVE (ALREADY-COMBINED) DEBT METRICS
    'LongTermDebtNoncurrent','LongTermDebtCurrent'
]

dera_instant_dict = {
    'float': float_tags,
    'cash': cash_tags,
    'debt': debt_tags,
    'assets': ['Assets'],
}

for what, tags in dera_instant_dict.items():
    tbl = 't_' + what[0]
    
    print(f'Creating instant table `{tbl}` for `{what}`')

    query_usd = generate_instant_dera_usd_query(what, tags)
    print(query_usd)
    
    query_xyz = generate_instant_dera_xyz_query(what, tags)
    print(query_xyz)
    
    qres = osc._do_sql(f"drop table if exists {tbl}", engine, verbose=True)    
    qres = osc._do_sql(query_usd, engine, verbose=False)    
    qres = osc._do_sql(query_xyz, engine, verbose=False)    
    query_final = generate_instant_dera_combined_query(tbl, what, [1, 2, 3])
    qres = osc._do_sql(query_final, engine, verbose=True)

Creating instant table `t_f` for `float`

create or replace view float_usd_by_lei as
select S.adsh, S.cik, S.name, S.lei, S.sic, S.fy, N.ddate, N.uom, max(value) as float
from dera_sub as S
     join dera_num as N on S.adsh=N.adsh
where (S.form='10-K' or S.form='20-F' or S.form='40-F')
      and S.fy>=DATE('2014-01-01')
      and value>0
      and uom='USD'
      and coreg is NULL
      and qtrs=0
      and (N.tag='EntityPublicFloat' or N.tag='FreeFloat' or N.tag='PublicFloat' or N.tag='PublicFloatValue' or N.tag='ComputedFloat' or N.tag='ComputedMarketFloat' or N.tag='ComputedTreasuryFloat')
group by S.adsh, S.cik, S.name, S.lei, S.sic, S.fy, N.ddate, N.uom


create or replace view float_xyz_by_lei as
select S.adsh, S.cik, S.name, S.lei, S.sic, S.fy, N.ddate, N.uom, max(value) as float
from dera_sub as S
     left join float_usd_by_lei X on S.adsh=X.adsh
     join dera_num as N on S.adsh=N.adsh and (X.adsh IS NULL or X.ddate=N.ddate)
where X.ddate IS NULL
      and (S.form='10-K' or S

In [12]:
# cik=1056903 
engine.execute(f"""
select *, row_number() over (partition by cik, fy order by abs(date_diff('day', fy, ddate))) as rn
from float_usd_by_lei where lei='529900L26LIS2V8PWM23' order by fy, ddate
""").fetchall()

[('0001056903-17-000003', 1056903, 'AMERICAN STATES WATER CO', '529900L26LIS2V8PWM23', 4941, '2016-01-01 00:00:00.000', '2016-06-30 00:00:00.000', 'USD', 1601802000.0, 1),
 ('0001056903-17-000003', 1056903, 'AMERICAN STATES WATER CO', '529900L26LIS2V8PWM23', 4941, '2016-01-01 00:00:00.000', '2017-02-28 00:00:00.000', 'USD', 1629577000.0, 2),
 ('0001056903-18-000006', 1056903, 'AMERICAN STATES WATER CO', '529900L26LIS2V8PWM23', 4941, '2017-01-01 00:00:00.000', '2017-06-30 00:00:00.000', 'USD', 1737328000.0, 1),
 ('0001056903-18-000006', 1056903, 'AMERICAN STATES WATER CO', '529900L26LIS2V8PWM23', 4941, '2017-01-01 00:00:00.000', '2018-02-28 00:00:00.000', 'USD', 1963913000.0, 2),
 ('0001056903-19-000004', 1056903, 'AMERICAN STATES WATER CO', '529900L26LIS2V8PWM23', 4941, '2018-01-01 00:00:00.000', '2018-06-30 00:00:00.000', 'USD', 2099687000.0, 1),
 ('0001056903-19-000004', 1056903, 'AMERICAN STATES WATER CO', '529900L26LIS2V8PWM23', 4941, '2018-01-01 00:00:00.000', '2019-02-28 00:00:00

### Fitting Everything Together

The FINANCIALS_BY_LEI table rolls up all the metrics we capture in once place, hopefully with no duplicated rows and only the best facts.

In [13]:
# Ideally we can replace previously reported numbers with updated numbers.  But not yet.

qres = osc._do_sql(f"""
create or replace view financials_by_lei as
select R.name, R.lei, R.cik, if(S.countryinc!='',S.countryinc,S.countryba) as country,
       -- T.tname,
       R.fy, R.ddate, R.sic, revenue_usd, income_usd, float_usd as market_cap_usd, debt_usd, cash_usd, assets_usd
from t_r as R -- left join ticker T on S.cik=T.cik
     join {dera_prefix}sub as S on R.cik=S.cik and R.fy=S.fy and (S.form='10-K' or S.form='20-F' or S.form='40-F')
     left join t_i as I on R.cik=I.cik and R.ddate=I.ddate
     left join t_f as F on R.cik=F.cik and R.fy=F.fy -- year(R.ddate)=year(F.ddate) 
     left join t_d as D on R.cik=D.cik and R.ddate=D.ddate
     left join t_c as C on R.cik=C.cik and R.ddate=C.ddate
     left join t_a as A on R.cik=A.cik and R.ddate=A.ddate
-- where tname is null or tname not like '%-%'
""", engine, verbose=True)

qres = engine.execute(f"""
select count(*) from financials_by_lei
""")
print(qres.fetchall())


create or replace view financials_by_lei as
select R.name, R.lei, R.cik, if(S.countryinc!='',S.countryinc,S.countryba) as country,
       -- T.tname,
       R.fy, R.ddate, R.sic, revenue_usd, income_usd, float_usd as market_cap_usd, debt_usd, cash_usd, assets_usd
from t_r as R -- left join ticker T on S.cik=T.cik
     join dera_sub as S on R.cik=S.cik and R.fy=S.fy and (S.form='10-K' or S.form='20-F' or S.form='40-F')
     left join t_i as I on R.cik=I.cik and R.ddate=I.ddate
     left join t_f as F on R.cik=F.cik and R.fy=F.fy -- year(R.ddate)=year(F.ddate) 
     left join t_d as D on R.cik=D.cik and R.ddate=D.ddate
     left join t_c as C on R.cik=C.cik and R.ddate=C.ddate
     left join t_a as A on R.cik=A.cik and R.ddate=A.ddate
-- where tname is null or tname not like '%-%'

[(35055,)]


In [14]:
qres = engine.execute(f"""
select count(*), year(ddate) from t_r group by year(ddate) order by year(ddate)
""")
qres.fetchall()

[(14, 2014),
 (63, 2015),
 (2957, 2016),
 (3883, 2017),
 (5359, 2018),
 (5484, 2019),
 (5496, 2020),
 (5845, 2021),
 (4946, 2022),
 (102, 2023)]

### An outstanding GLEIF problem:

How far up the hierarchy should we climb?  For PG&E this really matters...

In [15]:
engine.execute(f"select * from t_r where lei='I1BZKREC126H0VB1BL91' order by fy, ddate").fetchall()

[('0001326160-18-000034', 1326160, 'DUKE ENERGY CORP', 'I1BZKREC126H0VB1BL91', 4931, '2017-01-01 00:00:00.000', '2017-12-31 00:00:00.000', 'USD', 23565000000.0),
 ('0001326160-19-000057', 1326160, 'DUKE ENERGY CORP', 'I1BZKREC126H0VB1BL91', 4931, '2018-01-01 00:00:00.000', '2018-12-31 00:00:00.000', 'USD', 24521000000.0),
 ('0001326160-20-000034', 1326160, 'DUKE ENERGY CORP', 'I1BZKREC126H0VB1BL91', 4931, '2019-01-01 00:00:00.000', '2019-12-31 00:00:00.000', 'USD', 25079000000.0),
 ('0001326160-21-000063', 1326160, 'DUKE ENERGY CORP', 'I1BZKREC126H0VB1BL91', 4931, '2020-01-01 00:00:00.000', '2020-12-31 00:00:00.000', 'USD', 23868000000.0),
 ('0001326160-22-000072', 1326160, 'DUKE ENERGY CORP', 'I1BZKREC126H0VB1BL91', 4931, '2021-01-01 00:00:00.000', '2021-12-31 00:00:00.000', 'USD', 25097000000.0),
 ('0001326160-23-000073', 1326160, 'DUKE ENERGY CORP', 'I1BZKREC126H0VB1BL91', 4931, '2022-01-01 00:00:00.000', '2022-12-31 00:00:00.000', 'USD', 28768000000.0)]

In [16]:
l = engine.execute(f"""
select A.lei, A.parent_name, R.name, date_format(R.fy, '%Y-%m-%d'), date_format(R.ddate, '%Y-%m-%d'), R.revenue_usd
from (select coalesce(G.ultimate_parent_issuer_lei,U.parent_lei) as lei, U.parent_name
      from {rmi_schema}.utility_information_2023 as U left join {gleif_schema}.gleif_direct_issuer_ultimate_issuer as G on U.parent_lei=G.direct_issuer_lei
      group by coalesce(G.ultimate_parent_issuer_lei,U.parent_lei), U.parent_name) as A
     left join t_r as R on A.lei=R.lei
where A.lei is not null
      and (R.lei is null or year(R.fy)=2020)
order by A.parent_name
""").fetchall()

print(len(l))
display(l)

198


[('2NUNNB7D43COUIRE5295', 'AES Corp.', 'AES CORP', '2020-01-01', '2020-12-31', 9660000000.0),
 ('LQPXMHHNJKIPJYE53543', 'Alberta Investment Management Corp.', None, None, None, None),
 ('549300T12EZ1F6PWWU29', 'Alcoa Corp.', 'ALCOA CORP', '2020-01-01', '2020-12-31', 9286000000.0),
 ('549300K5VIUTJXQL7X75', 'Algonquin Power & Utilities Corp.', 'ALGONQUIN POWER & UTILITIES CORP.', '2020-01-01', '2020-12-31', 1677058000.0),
 ('549300NNLSIMY6Z8OT86', 'Allete, Inc.', 'ALLETE INC', '2020-01-01', '2020-12-31', 1169100000.0),
 ('5493009ML300G373MZ12', 'Alliant Energy', 'ALLIANT ENERGY CORP', '2020-01-01', '2020-12-31', 3416000000.0),
 ('5493006MHB84DD0ZWV18', 'Alphabet Inc.', 'ALPHABET INC.', '2020-01-01', '2020-12-31', 182527000000.0),
 ('96950032TUYMW11FB530', 'Alstom SA', None, None, None, None),
 ('549300D7A8QA85Z2MH11', 'AltaGas Ltd.', 'ALTAGAS LTD.', '2020-01-01', '2020-12-31', 4355637001.130126),
 ('529900NZXZGBCBXYY327', 'Amaresco, Inc.', 'AMERESCO, INC.', '2020-01-01', '2020-12-31', 1

In [17]:
l = engine.execute(f"""
select A.lei, A.parent_name, R.name, date_format(R.fy, '%Y-%m-%d'), date_format(R.ddate, '%Y-%m-%d'), R.revenue_usd
from (select coalesce(G.ultimate_parent_issuer_lei,U.parent_lei) as lei, U.parent_name
      from {rmi_schema}.utility_information_2023 as U left join {gleif_schema}.gleif_direct_issuer_ultimate_issuer as G on U.parent_lei=G.direct_issuer_lei
      group by coalesce(G.ultimate_parent_issuer_lei,U.parent_lei), U.parent_name) as A
     left join t_r as R on A.lei=R.lei
where A.lei is not null
      and (R.lei is null or year(R.fy)=2021)
order by A.parent_name
""").fetchall()

print(len(l))
display(l)

196


[('2NUNNB7D43COUIRE5295', 'AES Corp.', 'AES CORP', '2021-01-01', '2021-12-31', 11141000000.0),
 ('LQPXMHHNJKIPJYE53543', 'Alberta Investment Management Corp.', None, None, None, None),
 ('549300T12EZ1F6PWWU29', 'Alcoa Corp.', 'ALCOA CORP', '2021-01-01', '2021-12-31', 12152000000.0),
 ('549300K5VIUTJXQL7X75', 'Algonquin Power & Utilities Corp.', 'ALGONQUIN POWER & UTILITIES CORP.', '2021-01-01', '2021-12-31', 2285479000.0),
 ('549300NNLSIMY6Z8OT86', 'Allete, Inc.', 'ALLETE INC', '2021-01-01', '2021-12-31', 1419200000.0),
 ('5493009ML300G373MZ12', 'Alliant Energy', 'ALLIANT ENERGY CORP', '2021-01-01', '2021-12-31', 3669000000.0),
 ('5493006MHB84DD0ZWV18', 'Alphabet Inc.', 'ALPHABET INC.', '2021-01-01', '2021-12-31', 257637000000.0),
 ('96950032TUYMW11FB530', 'Alstom SA', None, None, None, None),
 ('549300D7A8QA85Z2MH11', 'AltaGas Ltd.', 'ALTAGAS LTD.', '2021-01-01', '2021-12-31', 8346771818.190034),
 ('529900NZXZGBCBXYY327', 'Amaresco, Inc.', 'AMERESCO, INC.', '2021-01-01', '2021-12-31',

In [18]:
l = engine.execute(f"""
select A.lei, A.parent_name, R.name, date_format(R.fy, '%Y-%m-%d'), date_format(R.ddate, '%Y-%m-%d'), R.revenue_usd
from (select coalesce(G.ultimate_parent_issuer_lei,U.parent_lei) as lei, U.parent_name
      from {rmi_schema}.utility_information_2023 as U left join {gleif_schema}.gleif_direct_issuer_ultimate_issuer as G on U.parent_lei=G.direct_issuer_lei
      group by coalesce(G.ultimate_parent_issuer_lei,U.parent_lei), U.parent_name) as A
     left join t_r as R on A.lei=R.lei
where A.lei is not null
      and (R.lei is null or year(R.fy)=2022)
order by A.parent_name
""").fetchall()

print(len(l))
display(l)

190


[('2NUNNB7D43COUIRE5295', 'AES Corp.', 'AES CORP', '2022-01-01', '2022-12-31', 12617000000.0),
 ('LQPXMHHNJKIPJYE53543', 'Alberta Investment Management Corp.', None, None, None, None),
 ('549300T12EZ1F6PWWU29', 'Alcoa Corp.', 'ALCOA CORP', '2022-01-01', '2022-12-31', 12451000000.0),
 ('549300K5VIUTJXQL7X75', 'Algonquin Power & Utilities Corp.', 'ALGONQUIN POWER & UTILITIES CORP.', '2022-01-01', '2022-12-31', 2765155000.0),
 ('549300NNLSIMY6Z8OT86', 'Allete, Inc.', 'ALLETE INC', '2022-01-01', '2022-12-31', 1570700000.0),
 ('5493009ML300G373MZ12', 'Alliant Energy', 'ALLIANT ENERGY CORP', '2022-01-01', '2022-12-31', 4205000000.0),
 ('5493006MHB84DD0ZWV18', 'Alphabet Inc.', 'ALPHABET INC.', '2022-01-01', '2022-12-31', 282836000000.0),
 ('96950032TUYMW11FB530', 'Alstom SA', None, None, None, None),
 ('549300D7A8QA85Z2MH11', 'AltaGas Ltd.', 'ALTAGAS LTD.', '2022-01-01', '2022-12-31', 10396402291.648706),
 ('529900NZXZGBCBXYY327', 'Amaresco, Inc.', 'AMERESCO, INC.', '2022-01-01', '2022-12-31'