# ITR Data Pipeline

* Global Parameters
* Industry Data (Sector Projections)

* Portfolio Data
* Company Data
* Automization
* Temperature Scoring

## Environment variables and dot-env

The following cell looks for a "dot-env" file in some standard locations,
and loads its contents into `os.environ`.

In [1]:
import os
import pathlib
from dotenv import load_dotenv

# Load some standard environment variables from a dot-env file, if it exists.
# If no such file can be found, does not fail, and so allows these environment vars to
# be populated in some other way
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

## S3 and boto3

In [2]:
import boto3

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

In [3]:
from osc_ingest_trino import *

s3 = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ["S3_DEV_ENDPOINT"],
    aws_access_key_id=os.environ["S3_DEV_ACCESS_KEY"],
    aws_secret_access_key=os.environ["S3_DEV_SECRET_KEY"],
)
trino_bucket = attach_s3_bucket("S3_DEV")

## Connecting to Trino with sqlalchemy

In [4]:
import trino
from sqlalchemy.engine import create_engine

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ['TRINO_USER'],
    host = os.environ['TRINO_HOST'],
    port = os.environ['TRINO_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ['TRINO_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

ingest_schema = 'itr_mdt'

In [5]:
import pandas as pd
import io

### Global Parameters

These parameters are set/selected by the ITR tool.

### Industry Data (Sector Projections)

In [6]:
scenarios = {}
for scenario in ['TPI', 'TPI_below_2', 'OECM']:
    df_dict = pd.read_excel(os.environ.get('PWD')+f"/itr-data-pipeline/data/external/{scenario}_EI_and_production_benchmarks{('','_v2')[scenario=='OECM']}.xlsx", sheet_name=None)
    for projtype in ['projected_production', 'projected_ei_in_Wh']:
        df_dict[projtype]['projection'] = projtype
        df_dict[projtype]['scenario'] = scenario
    scenarios[scenario] = pd.concat (df_dict.values())
df = pd.concat(scenarios, ignore_index=True)
cols = df.columns.tolist()
cols = cols[-2:]+cols[0:-2]
df = df[cols]
df

Unnamed: 0,projection,scenario,region,sector,2019,2020,2021,2022,2023,2024,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,projected_ei_in_Wh,TPI,Global,Steel,0.60756,0.457,0.4376,0.4182,0.3988,0.3794,...,0.0888,0.0806,0.0724,0.0642,0.056,0.0528,0.0496,0.0464,0.0432,0.04
1,projected_ei_in_Wh,TPI,Global,Electricity Utilities,1.669,1.498,1.4718,1.4456,1.4194,1.3932,...,0.7858,0.7586,0.7314,0.7042,0.677,0.6658,0.6546,0.6434,0.6322,0.621
2,projected_production,TPI,Global,Steel,0.0,0.015,0.015,0.015,0.015,0.015,...,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015
3,projected_production,TPI,Europe,Steel,0.0,0.015,0.015,0.015,0.015,0.015,...,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015
4,projected_production,TPI,North America,Steel,0.0,0.015,0.015,0.015,0.015,0.015,...,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015
5,projected_production,TPI,Global,Electricity Utilities,0.0,-0.076399,0.059958,0.059958,0.059958,0.059958,...,0.025848,0.025848,0.025848,0.025848,0.025848,0.011913,0.011913,0.011913,0.011913,0.011913
6,projected_production,TPI,Europe,Electricity Utilities,0.0,-0.076445,0.025801,0.025801,0.025801,0.025801,...,0.012046,0.012046,0.012046,0.012046,0.012046,0.00636,0.00636,0.00636,0.00636,0.00636
7,projected_production,TPI,North America,Electricity Utilities,0.0,-0.07581,0.019813,0.019813,0.019813,0.019813,...,0.014293,0.014293,0.014293,0.014293,0.014293,0.003227,0.003227,0.003227,0.003227,0.003227
8,projected_ei_in_Wh,TPI_below_2,Global,Steel,0.60756,0.44,0.418,0.396,0.374,0.352,...,0.0616,0.0512,0.0408,0.0304,0.02,0.0144,0.0088,0.0032,-0.0024,-0.008
9,projected_ei_in_Wh,TPI_below_2,Global,Electricity Utilities,1.669,1.325,1.2692,1.2134,1.1576,1.1018,...,0.4566,0.4362,0.4158,0.3954,0.375,0.3526,0.3302,0.3078,0.2854,0.263


In [7]:
sector_projections = df.melt(id_vars=cols[0:4], value_vars=cols[4:], var_name='year')
sector_projections

Unnamed: 0,projection,scenario,region,sector,year,value
0,projected_ei_in_Wh,TPI,Global,Steel,2019,0.607560
1,projected_ei_in_Wh,TPI,Global,Electricity Utilities,2019,1.669000
2,projected_production,TPI,Global,Steel,2019,0.000000
3,projected_production,TPI,Europe,Steel,2019,0.000000
4,projected_production,TPI,North America,Steel,2019,0.000000
...,...,...,...,...,...,...
891,projected_production,OECM,Europe,Steel,2050,0.015000
892,projected_production,OECM,North America,Steel,2050,0.015000
893,projected_production,OECM,Global,Electricity Utilities,2050,0.011913
894,projected_production,OECM,Europe,Electricity Utilities,2050,0.006360


### Portfolio Data

Prepare GLEIF matching data

In [8]:
gleif_file = s3_source.Object(os.environ['S3_LANDING_BUCKET'],'mtiemann-GLEIF/DERA-matches.csv')
gleif_file.download_file(f'/tmp/dera-gleif.csv')
gleif_df = pd.read_csv(f'/tmp/dera-gleif.csv', header=0, sep=',', dtype=str, engine='c')
gleif_dict = dict(zip(gleif_df.name, gleif_df.LEI))

In [9]:
gleif_dict['Old Dominion Electric Cooperative'.upper()]

'SW4VC32Z0ZKLJKPONQ50'

In [10]:
gleif_1 = { k.split(',')[0].split(' ')[0]:v for k,v in gleif_dict.items() }
gleif_2 = { ' '.join(k.split(',')[0].split(' ')[0:2]):v for k,v in gleif_dict.items() }

def gleif_match(x):
    x = x.split(',')[0]
    x = x.replace('.','')
    if x in gleif_dict:
        return gleif_dict[x]
    x2 = ' '.join(x.split(' ')[0:2])
    if x2 in gleif_2:
        return gleif_2[x2]
    if ' ' not in x and x in gleif_1:
        return gleif_1[x]
    return None

portfolio_df = pd.read_csv(os.environ.get('PWD')+f"/itr-data-pipeline/data/external/example_portfolio_rmi_v2.csv",
                        delimiter=';')
portfolio_df.loc[portfolio_df.company_name=='Cleco Partners LP', 'company_name'] = 'CLECO CORPORATE HOLDINGS LLC'
portfolio_df.loc[portfolio_df.company_name=='OG&E Energy', 'company_name'] = 'OGE ENERGY CORP.'
portfolio_df.loc[portfolio_df.company_name=='Old Dominion Electric Coop.', 'company_name'] = 'Old Dominion Electric Cooperative'
portfolio_df.loc[portfolio_df.company_name=='PG&E Corp.', 'company_name'] = 'PG&E Corp'
portfolio_df['LEI'] = portfolio_df['company_name'].str.upper().map(gleif_match)
# display(portfolio_df[portfolio_df.LEI.isna()])
portfolio_df = portfolio_df.dropna(how='any').convert_dtypes()
len(portfolio_df)

40

In [11]:
portfolio_df

Unnamed: 0,company_name,company_id,company_isin,investment_value,LEI
0,AES Corp.,1,1,4351252525.0,2NUNNB7D43COUIRE5295
1,Algonquin Power & Utilities Corp.,2,2,2228185629.0,549300K5VIUTJXQL7X75
2,"ALLETE, Inc.",3,3,3829481150.0,549300NNLSIMY6Z8OT86
3,Alliant Energy,4,4,3829481150.0,5493009ML300G373MZ12
4,Ameren Corp.,5,5,15917812492.0,XRZQ5S7HYJFPHJ78L959
5,"American Electric Power Co., Inc.",6,6,45520637216.0,1B4S6S7G0TW5EE83BO58
6,"Avangrid, Inc.",7,7,10049068025.0,549300OX0Q38NLSKPB49
7,Avista Corp.,8,8,2804211626.0,Q0IK63NITJD6RJ47SW96
13,CLECO CORPORATE HOLDINGS LLC,14,14,3086052316.0,5493002H80P81B3HXL31
14,CMS Energy,15,15,9153135681.0,549300IA9XFBAGNIBW29


In [12]:
engine.execute(f"create schema if not exists {ingest_schema}").fetchall()

qres = engine.execute(f"show tables in {ingest_schema}")
l = qres.fetchall()
for x in l:
    qres = engine.execute(f"drop table {ingest_schema}.{x[0]}")
    display(qres.fetchall())

engine.execute(f"drop schema {ingest_schema}").fetchall()
engine.execute(f"create schema {ingest_schema}").fetchall()

[(True,)]

[(True,)]

In [13]:
buf = io.BytesIO()
portfolio_df.to_parquet(path=buf)
buf.seek(0)
trino_bucket.upload_fileobj(Fileobj=buf,
                            Key=f'trino/{ingest_schema}/portfolio/data.parquet')

columnschema = create_table_schema_pairs(enforce_sql_column_names(portfolio_df))
tabledef = f"""
create table if not exists {ingest_schema}.portfolio (
{columnschema}
) with (
format = 'parquet',
external_location = 's3a://{trino_bucket.name}/trino/{ingest_schema}/portfolio/'
)
"""
print(tabledef)

table_create = engine.execute(tabledef)
for row in table_create.fetchall():
    print(row)




create table if not exists itr_mdt.portfolio (
    company_name varchar,
    company_id bigint,
    company_isin bigint,
    investment_value double,
    lei varchar
) with (
format = 'parquet',
external_location = 's3a://ocp-odh-os-demo-s3/trino/itr_mdt/portfolio/'
)

(True,)


### Company Data

In [14]:
# We have no S3 emissions in RMI data.

engine.execute("select * from sec_dera.sic_isic").fetchall()

[(2911, 1920), (3714, 2910), (3312, 2410), (4911, 4010)]

In [15]:
engine.execute("describe rmi_20211120.revenue_by_tech").fetchall()

[('parent_name', 'varchar', '', ''),
 ('utility_name', 'varchar', '', ''),
 ('respondent_id', 'integer', '', ''),
 ('year', 'timestamp(3)', '', ''),
 ('technology', 'varchar', '', ''),
 ('component', 'varchar', '', ''),
 ('revenue_total', 'double', '', ''),
 ('revenue_residential', 'double', '', '')]

In [16]:
engine.execute("describe sec_dera.financials_by_lei").fetchall()

[('name', 'varchar', '', ''),
 ('lei', 'varchar', '', ''),
 ('cik', 'integer', '', ''),
 ('fy', 'timestamp(3)', '', ''),
 ('ddate', 'timestamp(3)', '', ''),
 ('sic', 'smallint', '', ''),
 ('revenue_usd', 'double', '', ''),
 ('income_usd', 'double', '', ''),
 ('market_cap_usd', 'double', '', ''),
 ('debt_usd', 'double', '', ''),
 ('cash_usd', 'double', '', ''),
 ('assets_usd', 'double', '', '')]

In [17]:
gleif_dict['AES Corp'.upper()]

'2NUNNB7D43COUIRE5295'

In [18]:
gleif_2['AES CORP']

'2NUNNB7D43COUIRE5295'

In [19]:
qres = engine.execute("select * from rmi_20211120.utility_information where parent_lei='5493008F4ZOQFNG3WN54'")
l = qres.fetchall()
print(len(l))

0


In [20]:
l

[]

In [21]:
lei = '2NUNNB7D43COUIRE5295'
qres=engine.execute(f"""
select count (*), U.parent_name, U.parent_lei, 'co2', sum(EM.emissions_co2)*1000000, 'nox', sum(EM.emissions_nox), 'co2e', sum(EM.emissions_co2)*1000000 + 298 * sum(coalesce(EM.emissions_nox,0)), EM.year
from rmi_20211120.utility_information as U join rmi_20211120.operations_emissions_by_fuel as EM on U.respondent_id=EM.respondent_id
where U.parent_lei='{lei}'
group by U.parent_name, U.parent_lei, EM.year
order by EM.year desc
""")
qres.fetchall()

[(29, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 9425520.152473329, 'nox', None, 'co2e', 9425520.152473329, '2020-01-01 00:00:00.000'),
 (29, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 11616368.146942126, 'nox', 7644.781547817038, 'co2e', 13894513.048191603, '2019-01-01 00:00:00.000'),
 (29, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 11235889.892304936, 'nox', 9186.028876377723, 'co2e', 13973326.497465499, '2018-01-01 00:00:00.000'),
 (63, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 10483392.150493385, 'nox', 9370.12129119441, 'co2e', 13275688.29526932, '2017-01-01 00:00:00.000'),
 (83, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 20950763.511785727, 'nox', 21862.279738238183, 'co2e', 27465722.873780705, '2016-01-01 00:00:00.000'),
 (93, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 22075533.730533343, 'nox', 25317.484745301856, 'co2e', 29620144.184633296, '2015-01-01 00:00:00.000'),
 (92, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 27647741.53420725, 'nox', 34320.420396608606, 'co2e',

In [22]:
l[:10]

[]

In [23]:
qres = engine.execute(f"""
select F.name, F.lei, T.tname, U2.parent_ticker, F.sic, F.ddate,
       'revenue' as rl, round (F.revenue_usd/1000000.0, 1), round (RT2.fy_revenue_total/1000000.0, 1), round (CS2.fy_revenues/1000000.0, 1), round (F.revenue_usd/RT2.fy_revenue_total, 1), round (F.revenue_usd/CS2.fy_revenues, 1),
       'market_cap' as fl, round (F.market_cap_usd/1000000.0, 1),
       'EV' as el, round ((F.market_cap_usd+F.debt_usd-F.cash_usd)/1000000, 1),
       'assets' as al, round (F.assets_usd/1000000.0, 1), round (AEI2.asset_value/1000000.0, 1), round (F.assets_usd/AEI2.asset_value, 1),
       'cash' as cc, round (F.cash_usd/1000000.0, 1),
       -- 'income' as il, F.income_usd/1000000.0, AEI2.fy_earnings_value/1000000.0, F.income_usd/AEI2.fy_earnings_value,
       'counts: ulei, aei, rt, cs' as legend, c_ulei, c_aei, c_rt, c_cs
from {ingest_schema}.portfolio as P
     join (select count (*) as c_ulei, U.parent_name, U.parent_lei, U.parent_ticker
           from rmi_20211120.utility_information as U
           group by U.parent_name, U.parent_lei, U.parent_ticker) as U2 on U2.parent_lei=P.lei
     join sec_dera.financials_by_lei as F on F.lei=P.lei
     join (select count (*) as c_cs, CS.parent_name, CS.year, sum(revenues) as fy_revenues
           from rmi_20211120.customers_sales as CS
           group by CS.parent_name, CS.year) as CS2 on CS2.parent_name=U2.parent_name and year(CS2.year)=year(F.ddate)
     join (select count (*) as c_aei, AEI.parent_name, AEI.year, sum(AEI.asset_value) as asset_value, sum(AEI.earnings_value) as fy_earnings_value
           from rmi_20211120.assets_earnings_investments as AEI
           group by AEI.parent_name, AEI.year) as AEI2 on AEI2.parent_name=U2.parent_name and year(AEI2.year)=year(F.ddate)
     left join (select count (*) as c_rt, RT.parent_name, RT.year, sum(RT.revenue_total) as fy_revenue_total
           from rmi_20211120.revenue_by_tech as RT
           group by RT.parent_name, RT.year) as RT2 on RT2.parent_name=U2.parent_name and year(RT2.year)=year(F.ddate)
     left join sec_dera.ticker T on F.cik=T.cik and upper(T.tname)=U2.parent_ticker
where year(F.ddate)=2019
order by F.name
""")

l = qres.fetchall()
print(len(l))
[x for x in l if any(x) is None]

38


[]

In [24]:
l

[('AES CORP', '2NUNNB7D43COUIRE5295', 'aes', 'AES', 4991, '2019-12-31 00:00:00.000', 'revenue', 10189.0, 2042.7, 2180.7, 5.0, 4.7, 'market_cap', 10870.0, 'EV', 10102.0, 'assets', 33648.0, 4351.3, 7.7, 'cash', 1029.0, 'counts: ulei, aei, rt, cs', 2, 34, 49, 12),
 ('ALGONQUIN POWER & UTILITIES CORP.', '549300K5VIUTJXQL7X75', 'aqn', 'AQN', 4911, '2019-12-31 00:00:00.000', 'revenue', 1626.4, 692.0, 767.0, 2.4, 2.1, 'market_cap', None, 'EV', None, 'assets', 10920.8, 2237.8, 4.9, 'cash', 62.5, 'counts: ulei, aei, rt, cs', 3, 46, 61, 18),
 ('ALLETE INC', '549300NNLSIMY6Z8OT86', 'ale', 'ALE', 4931, '2019-12-31 00:00:00.000', 'revenue', 1240.5, 739.7, 955.0, 1.7, 1.3, 'market_cap', 4285.3, 'EV', 5829.8, 'assets', 5482.8, 3829.5, 1.4, 'cash', 69.3, 'counts: ulei, aei, rt, cs', 2, 35, 51, 12),
 ('ALLIANT ENERGY CORP', '5493009ML300G373MZ12', 'lnt', 'LNT', 4931, '2019-12-31 00:00:00.000', 'revenue', 3648.0, 2777.7, 3059.2, 1.3, 1.2, 'market_cap', 11600.0, 'EV', 18504.0, 'assets', 16701.0, 10006.9,

In [25]:
engine.execute("select * from sec_dera.sub where adsh='0000878004-20-000006'").fetchall()

[('0000878004-20-000006', 878004, 'CHUGACH ELECTRIC ASSOCIATION INC', None, 4911, 'US', 'AK', 'ANCHORAGE', '99518', '5601 ELECTRON DR', 'PO BOX 196300', '9075637494', 'US', 'AK', 'ANCHORAGE', '99518', '5601 ELECTRON DRIVE', '', 'US', 'AK', 920014224, '', '', '4-NON', True, '1231', '10-K', '2019-12-31 00:00:00.000', '2019-01-01 00:00:00.000', 'FY', '2020-03-20 00:00:00.000', '2020-03-20 15:57:00.000', True, True, 'c004-20191231.xml', 1, '', '2020q1')]

In [26]:
engine.execute("select * from rmi_20211120.utility_information as U join rmi_20211120.revenue_by_tech as CS on U.parent_name=CS.parent_name where U.parent_lei='SW4VC32Z0ZKLJKPONQ50'").fetchall()

[]

In [27]:
engine.execute("select * from sec_dera.t_f where lei='549300SVYJS666PQJH88' and year(ddate)=2019").fetchall()

[('0001031296-20-000008', 1031296, 'FIRSTENERGY CORP', '549300SVYJS666PQJH88', 4911, '2019-01-01 00:00:00.000', '2019-06-30 00:00:00.000', 'USD', 22724895037.0)]

In [29]:
engine.execute("select * from sec_dera.num where adsh='0000066756-20-000016' and ddate=DATE('2019-06-30') and value=4285299935").fetchall()

[('0000066756-20-000016', 'EntityPublicFloat', 'dei/2019', None, '2019-06-30 00:00:00.000', 0, 'USD', 4285299935.0, None, '2020q1')]

In [31]:
engine.execute("select * from sec_dera.num where adsh='0001174169-21-000012' and tag like '%Float%'").fetchall()

[]

In [32]:
engine.execute("select * from sec_dera.t_f where lei='549300K5VIUTJXQL7X75'").fetchall()

[]

In [33]:
engine.execute("select * from sec_dera.t_f where lei='2NUNNB7D43COUIRE5295'").fetchall()

[('0000874761-21-000015', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2020-01-01 00:00:00.000', '2020-06-30 00:00:00.000', 'USD', 9420000000.0),
 ('0000874761-19-000012', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2018-01-01 00:00:00.000', '2017-06-30 00:00:00.000', 'USD', 8630000000.0),
 ('0000874761-20-000012', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2019-01-01 00:00:00.000', '2019-06-30 00:00:00.000', 'USD', 10870000000.0)]

In [35]:
engine.execute("select * from sec_dera.t_f F left join sec_dera.ticker T on F.cik=T.cik where lei='2NUNNB7D43COUIRE5295' and T.tname='aes' order by fy desc").fetchall()

[('0000874761-21-000015', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2020-01-01 00:00:00.000', '2020-06-30 00:00:00.000', 'USD', 9420000000.0, 874761, 'aes'),
 ('0000874761-20-000012', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2019-01-01 00:00:00.000', '2019-06-30 00:00:00.000', 'USD', 10870000000.0, 874761, 'aes'),
 ('0000874761-19-000012', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2018-01-01 00:00:00.000', '2017-06-30 00:00:00.000', 'USD', 8630000000.0, 874761, 'aes')]

In [36]:
engine.execute("select * from sec_dera.t_r where lei='2NUNNB7D43COUIRE5295' order by ddate desc").fetchall()

[('0000874761-21-000015', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2020-01-01 00:00:00.000', '2020-12-31 00:00:00.000', 'USD', 9660000000.0),
 ('0000874761-21-000015', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2020-01-01 00:00:00.000', '2019-12-31 00:00:00.000', 'USD', 10189000000.0),
 ('0000874761-21-000015', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2020-01-01 00:00:00.000', '2018-12-31 00:00:00.000', 'USD', 10736000000.0),
 ('0000874761-20-000012', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2019-01-01 00:00:00.000', '2017-12-31 00:00:00.000', 'USD', 10530000000.0),
 ('0000874761-19-000012', 874761, 'AES CORP', '2NUNNB7D43COUIRE5295', 4991, '2018-01-01 00:00:00.000', '2016-12-31 00:00:00.000', 'USD', 10281000000.0)]

In [37]:
qres=engine.execute(f"""
select CS.* -- count (*) as c_cs, U.parent_lei, CS.year, sum(CS.revenues) as fy_revenues
from rmi_20211120.customers_sales as CS join rmi_20211120.utility_information U on CS.parent_name=U.parent_name
where CS.year=DATE('2019-01-01') and U.parent_lei='{lei}'
-- group by U.parent_lei, CS.year
""")
l = qres.fetchall()
print(len(l))

24


In [38]:
qres = engine.execute(f"""
select year(CS2.year), U2.parent_name, U2.parent_lei, U2.parent_ticker,
       round(RT2.fy_revenue_total/1000000.0,1), round(CS2.fy_revenues/1000000.0,1),
       cast(round((1-RT2.fy_revenue_total/CS2.fy_revenues)*100,1) as varchar) || '%',
       'counts: ulei, rt, cs' as legend, c_ulei, c_rt, c_cs
from (select count (*) as c_ulei, U.parent_name, U.parent_lei, U.parent_ticker
           from rmi_20211120.utility_information as U
           group by U.parent_name, U.parent_lei, U.parent_ticker) as U2
     join (select count (*) as c_cs, CS.year, CS.parent_name, sum(revenues) as fy_revenues
           from rmi_20211120.customers_sales as CS
           group by CS.year, CS.parent_name) as CS2 on CS2.parent_name=U2.parent_name and CS2.year>=DATE('2018-01-01')
     join (select count (*) as c_rt, RT.year, RT.parent_name, sum(RT.revenue_total) as fy_revenue_total
           from rmi_20211120.revenue_by_tech as RT
           group by RT.year, RT.parent_name) as RT2 on RT2.parent_name=U2.parent_name and RT2.year=CS2.year
order by year(CS2.year) desc, U2.parent_name
""")

l = qres.fetchall()
print(len(l))

180


In [39]:
l[0:20]

[(2020, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'AES', 1873.6, 1962.3, '4.5%', 'counts: ulei, rt, cs', 2, 49, 12),
 (2020, 'Algonquin Power & Utilities Corp.', '549300K5VIUTJXQL7X75', 'AQN', 664.1, 725.5, '8.5%', 'counts: ulei, rt, cs', 3, 62, 18),
 (2020, 'Allete, Inc.', '549300NNLSIMY6Z8OT86', 'ALE', 691.5, 887.2, '22.1%', 'counts: ulei, rt, cs', 2, 51, 12),
 (2020, 'Alliant Energy', '5493009ML300G373MZ12', 'LNT', 2670.1, 2908.7, '8.2%', 'counts: ulei, rt, cs', 5, 65, 12),
 (2020, 'Ameren Corp.', 'XRZQ5S7HYJFPHJ78L959', 'AEE', 4451.2, 4391.0, '-1.4%', 'counts: ulei, rt, cs', 6, 61, 14),
 (2020, 'American Electric Power Co., Inc.', '1B4S6S7G0TW5EE83BO58', 'AEP', 10336.9, 11421.2, '9.5%', 'counts: ulei, rt, cs', 24, 217, 61),
 (2020, 'American States Water', '529900L26LIS2V8PWM23', 'AWR', 226.0, 18.9, '-1098.0%', 'counts: ulei, rt, cs', 1, 18, 4),
 (2020, 'Avangrid, Inc.', '549300OX0Q38NLSKPB49', 'AGR', 3391.9, 2819.6, '-20.3%', 'counts: ulei, rt, cs', 6, 86, 25),
 (2020, 'Avista Corp.', 

In [40]:
l[60:80]

[(2019, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'AES', 2042.7, 2180.7, '6.3%', 'counts: ulei, rt, cs', 2, 49, 12),
 (2019, 'Algonquin Power & Utilities Corp.', '549300K5VIUTJXQL7X75', 'AQN', 692.0, 767.0, '9.8%', 'counts: ulei, rt, cs', 3, 61, 18),
 (2019, 'Allete, Inc.', '549300NNLSIMY6Z8OT86', 'ALE', 739.7, 955.0, '22.5%', 'counts: ulei, rt, cs', 2, 51, 12),
 (2019, 'Alliant Energy', '5493009ML300G373MZ12', 'LNT', 2777.7, 3059.2, '9.2%', 'counts: ulei, rt, cs', 5, 66, 12),
 (2019, 'Ameren Corp.', 'XRZQ5S7HYJFPHJ78L959', 'AEE', 4705.6, 4543.3, '-3.6%', 'counts: ulei, rt, cs', 6, 59, 15),
 (2019, 'American Electric Power Co., Inc.', '1B4S6S7G0TW5EE83BO58', 'AEP', 10664.7, 12212.3, '12.7%', 'counts: ulei, rt, cs', 24, 215, 60),
 (2019, 'American States Water', '529900L26LIS2V8PWM23', 'AWR', 234.9, 39.7, '-491.7%', 'counts: ulei, rt, cs', 1, 20, 4),
 (2019, 'Avangrid, Inc.', '549300OX0Q38NLSKPB49', 'AGR', 3392.8, 2767.8, '-22.6%', 'counts: ulei, rt, cs', 6, 86, 25),
 (2019, 'Avista Corp.', 

In [43]:
engine.execute("select * from sec_dera.num where tag = 'ComputedTreasuryFloat'").fetchall()

[]

In [44]:
engine.execute("describe sec_dera.num").fetchall()

[('adsh', 'varchar', '', ''),
 ('tag', 'varchar', '', ''),
 ('version', 'varchar', '', ''),
 ('coreg', 'varchar', '', ''),
 ('ddate', 'timestamp(3)', '', ''),
 ('qtrs', 'smallint', '', ''),
 ('uom', 'varchar', '', ''),
 ('value', 'double', '', ''),
 ('footnote', 'varchar', '', ''),
 ('srcdir', 'varchar', '', '')]

In [45]:
engine.execute("select * from sec_dera.float_xyz_by_lei where adsh='0001004315-21-000067'").fetchall()

[]

In [46]:
engine.execute("select count (*) from sec_dera.num where tag='CommonStockValue' and value>0 and value<1000").fetchall()

[(1152,)]

In [47]:
engine.execute("select count (*) from sec_dera.num where tag='CommonStockValue' and value>1000").fetchall()

[(26898,)]