# ITR Data Pipeline

* Global Parameters
* Industry Data (Sector Projections)

* Portfolio Data
* Company Data
* Automization
* Temperature Scoring

## Environment variables and dot-env

The following cell looks for a "dot-env" file in some standard locations,
and loads its contents into `os.environ`.

In [1]:
import os
import pathlib
from dotenv import load_dotenv

# Load some standard environment variables from a dot-env file, if it exists.
# If no such file can be found, does not fail, and so allows these environment vars to
# be populated in some other way
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

## S3 and boto3

In [2]:
import boto3

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

In [3]:
from osc_ingest_trino import *

s3 = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ["S3_DEV_ENDPOINT"],
    aws_access_key_id=os.environ["S3_DEV_ACCESS_KEY"],
    aws_secret_access_key=os.environ["S3_DEV_SECRET_KEY"],
)
trino_bucket = attach_s3_bucket("S3_DEV")

## Connecting to Trino with sqlalchemy

In [4]:
import trino
from sqlalchemy.engine import create_engine

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ['TRINO_USER'],
    host = os.environ['TRINO_HOST'],
    port = os.environ['TRINO_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ['TRINO_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

ingest_schema = 'itr_mdt'

In [5]:
import pandas as pd
import io

### Global Parameters

These parameters are set/selected by the ITR tool.

### Industry Data (Sector Projections)

In [6]:
scenarios = {}
for scenario in ['TPI', 'TPI_below_2', 'OECM']:
    df_dict = pd.read_excel(os.environ.get('PWD')+f"/itr-data-pipeline/data/external/{scenario}_EI_and_production_benchmarks{('','_v2')[scenario=='OECM']}.xlsx", sheet_name=None)
    for projtype in ['projected_production', 'projected_ei_in_Wh']:
        df_dict[projtype]['projection'] = projtype
        df_dict[projtype]['scenario'] = scenario
    scenarios[scenario] = pd.concat (df_dict.values())
df = pd.concat(scenarios, ignore_index=True)
cols = df.columns.tolist()
cols = cols[-2:]+cols[0:-2]
df = df[cols]
df

Unnamed: 0,projection,scenario,region,sector,2019,2020,2021,2022,2023,2024,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,projected_ei_in_Wh,TPI,Global,Steel,0.60756,0.457,0.4376,0.4182,0.3988,0.3794,...,0.0888,0.0806,0.0724,0.0642,0.056,0.0528,0.0496,0.0464,0.0432,0.04
1,projected_ei_in_Wh,TPI,Global,Electricity Utilities,1.669,1.498,1.4718,1.4456,1.4194,1.3932,...,0.7858,0.7586,0.7314,0.7042,0.677,0.6658,0.6546,0.6434,0.6322,0.621
2,projected_production,TPI,Global,Steel,0.0,0.015,0.015,0.015,0.015,0.015,...,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015
3,projected_production,TPI,Europe,Steel,0.0,0.015,0.015,0.015,0.015,0.015,...,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015
4,projected_production,TPI,North America,Steel,0.0,0.015,0.015,0.015,0.015,0.015,...,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.015
5,projected_production,TPI,Global,Electricity Utilities,0.0,-0.076399,0.059958,0.059958,0.059958,0.059958,...,0.025848,0.025848,0.025848,0.025848,0.025848,0.011913,0.011913,0.011913,0.011913,0.011913
6,projected_production,TPI,Europe,Electricity Utilities,0.0,-0.076445,0.025801,0.025801,0.025801,0.025801,...,0.012046,0.012046,0.012046,0.012046,0.012046,0.00636,0.00636,0.00636,0.00636,0.00636
7,projected_production,TPI,North America,Electricity Utilities,0.0,-0.07581,0.019813,0.019813,0.019813,0.019813,...,0.014293,0.014293,0.014293,0.014293,0.014293,0.003227,0.003227,0.003227,0.003227,0.003227
8,projected_ei_in_Wh,TPI_below_2,Global,Steel,0.60756,0.44,0.418,0.396,0.374,0.352,...,0.0616,0.0512,0.0408,0.0304,0.02,0.0144,0.0088,0.0032,-0.0024,-0.008
9,projected_ei_in_Wh,TPI_below_2,Global,Electricity Utilities,1.669,1.325,1.2692,1.2134,1.1576,1.1018,...,0.4566,0.4362,0.4158,0.3954,0.375,0.3526,0.3302,0.3078,0.2854,0.263


In [7]:
sector_projections = df.melt(id_vars=cols[0:4], value_vars=cols[4:], var_name='year')
sector_projections

Unnamed: 0,projection,scenario,region,sector,year,value
0,projected_ei_in_Wh,TPI,Global,Steel,2019,0.607560
1,projected_ei_in_Wh,TPI,Global,Electricity Utilities,2019,1.669000
2,projected_production,TPI,Global,Steel,2019,0.000000
3,projected_production,TPI,Europe,Steel,2019,0.000000
4,projected_production,TPI,North America,Steel,2019,0.000000
...,...,...,...,...,...,...
891,projected_production,OECM,Europe,Steel,2050,0.015000
892,projected_production,OECM,North America,Steel,2050,0.015000
893,projected_production,OECM,Global,Electricity Utilities,2050,0.011913
894,projected_production,OECM,Europe,Electricity Utilities,2050,0.006360


### Portfolio Data

Get RMI LEI/ISIN data

In [8]:
rmi_lei_isin = pd.read_sql('select parent_name, parent_lei, parent_isin from rmi_20211120.utility_information', engine)
rmi_dict = dict(zip(rmi_lei_isin.parent_lei, rmi_lei_isin.parent_isin))
rmi_lei_isin

Unnamed: 0,parent_name,parent_lei,parent_isin
0,"American Electric Power Co., Inc.",1B4S6S7G0TW5EE83BO58,US0255371017
1,Southern Co.,549300FC3G3YU2FBZD92,US8425871071
2,Avista Corp.,Q0IK63NITJD6RJ47SW96,US05379B1070
3,Alcoa Corp.,549300T12EZ1F6PWWU29,US0138721065
4,FirstEnergy Corp.,549300SVYJS666PQJH88,US3379321074
...,...,...,...
370,"American Electric Power Co., Inc.",1B4S6S7G0TW5EE83BO58,US0255371017
371,WEC Energy Group,549300IGLYTZUK3PVP70,US92939U1060
372,Algonquin Power & Utilities Corp.,549300K5VIUTJXQL7X75,US0158577090
373,Tri-State Generation & Transmission Associatio...,549300VDHNFNPADSSV98,


Prepare GLEIF matching data

In [9]:
gleif_file = s3_source.Object(os.environ['S3_LANDING_BUCKET'],'mtiemann-GLEIF/DERA-matches.csv')
gleif_file.download_file(f'/tmp/dera-gleif.csv')
gleif_df = pd.read_csv(f'/tmp/dera-gleif.csv', header=0, sep=',', dtype=str, engine='c')
gleif_dict = dict(zip(gleif_df.name, gleif_df.LEI))

In [10]:
gleif_dict['Old Dominion Electric Cooperative'.upper()]

'SW4VC32Z0ZKLJKPONQ50'

Create a very simple entity matcher

In [11]:
# gleif_dict['Basin Electric Power Coop'.upper()] = gleif_dict['BASIN ELECTRIC POWER COOPERATIVE']
# gleif_dict['Big Rivers Electric Corp'.upper()] = gleif_dict['BIG RIVERS ELECTRIC CORPORATION']
gleif_dict['Cleco Partners LP'.upper()] = gleif_dict['CLECO CORPORATE HOLDINGS LLC']
# gleif_dict['Golden Spread Electric Coop., Inc'.upper()] = gleif_dict['GOLDEN SPREAD ELECTRIC COOPERATIVE, INC.']
gleif_dict['MIDWEST ENERGY INC'] = '549300O4B5CVWMKUES27'
gleif_dict['OG&E Energy'.upper()] = gleif_dict['OGE ENERGY CORP.']
# gleif_dict['Ohio Valley Electric Corp'.upper()] = gleif_dict['OHIO VALLEY ELECTRIC CORPORATION']
gleif_dict['Old Dominion Electric Coop'.upper()] = gleif_dict['OLD DOMINION ELECTRIC COOPERATIVE']
gleif_dict['PG&E Corp'.upper()] = gleif_dict['PG&E CORP']
gleif_dict['Tri-State Generation & Transmission Association'.upper()] = gleif_dict['TRI-STATE GENERATION & TRANSMISSION ASSOCIATION, INC.']

gleif_1 = { k.split(',')[0].split(' ')[0]:v for k,v in gleif_dict.items() }
gleif_2 = { ' '.join(k.split(',')[0].split(' ')[0:2]):v for k,v in gleif_dict.items() }

def gleif_match(x):
    x = x.split(',')[0]
    x = x.replace('.','')
    if x in gleif_dict:
        return gleif_dict[x]
    x2 = ' '.join(x.split(' ')[0:2])
    if x2 in gleif_2:
        return gleif_2[x2]
    if ' ' not in x and x in gleif_1:
        return gleif_1[x]
    return None

In [12]:
portfolio_df = pd.read_csv(os.environ.get('PWD')+f"/itr-data-pipeline/data/external/example_portfolio_rmi_v2.csv",
                        delimiter=';')
portfolio_df.insert(1, 'company_lei', portfolio_df.company_name.str.upper().map(gleif_match))
portfolio_df.company_id = portfolio_df.company_lei.map(rmi_dict)
portfolio_df = portfolio_df.drop('company_isin', axis=1)
display(portfolio_df[portfolio_df.company_lei.isna()])
portfolio_df = portfolio_df.dropna(how='any').convert_dtypes()
print(len(portfolio_df))
portfolio_df.iloc[0:40]

Unnamed: 0,company_name,company_lei,company_id,investment_value
8,Basin Electric Power Coop.,,,5363527000.0
9,Big Rivers Electric Corp.,,,1654593000.0
11,"Buckeye Power, Inc.",,,
12,Chugach Electric Assn Inc.,,,706390100.0
16,Deseret Generation & Tran Coop.,,,211398500.0
25,Fall River Electric Coop.,,,
28,"Golden Spread Electric Coop., Inc",,,1026858000.0
30,"Hermiston Generating Co., L.P.",,,
31,"Iberdrola, S.A.",,,0.0
33,Madison Gas and Electric Co.,,,


35


Unnamed: 0,company_name,company_lei,company_id,investment_value
0,AES Corp.,2NUNNB7D43COUIRE5295,US00130H1059,4351252525
1,Algonquin Power & Utilities Corp.,549300K5VIUTJXQL7X75,US0158577090,2228185629
2,"ALLETE, Inc.",549300NNLSIMY6Z8OT86,US0185223007,3829481150
3,Alliant Energy,5493009ML300G373MZ12,US0188021085,3829481150
4,Ameren Corp.,XRZQ5S7HYJFPHJ78L959,US0236081024,15917812492
5,"American Electric Power Co., Inc.",1B4S6S7G0TW5EE83BO58,US0255371017,45520637216
6,"Avangrid, Inc.",549300OX0Q38NLSKPB49,US05351W1036,10049068025
7,Avista Corp.,Q0IK63NITJD6RJ47SW96,US05379B1070,2804211626
13,Cleco Partners LP,5493002H80P81B3HXL31,US18551QAA58,3086052316
14,CMS Energy,549300IA9XFBAGNIBW29,US1258961002,9153135681


In [13]:
engine.execute(f"create schema if not exists {ingest_schema}").fetchall()

qres = engine.execute(f"show tables in {ingest_schema}")
l = qres.fetchall()
for x in l:
    qres = engine.execute(f"drop table {ingest_schema}.{x[0]}")
    display(qres.fetchall())

engine.execute(f"drop schema {ingest_schema}").fetchall()
engine.execute(f"create schema {ingest_schema}").fetchall()

[(True,)]

[(True,)]

In [14]:
buf = io.BytesIO()
portfolio_df.to_parquet(path=buf)
buf.seek(0)
trino_bucket.upload_fileobj(Fileobj=buf,
                            Key=f'trino/{ingest_schema}/portfolio/data.parquet')

columnschema = create_table_schema_pairs(enforce_sql_column_names(portfolio_df))
tabledef = f"""
create table if not exists {ingest_schema}.portfolio (
{columnschema}
) with (
format = 'parquet',
external_location = 's3a://{trino_bucket.name}/trino/{ingest_schema}/portfolio/'
)
"""
print(tabledef)

table_create = engine.execute(tabledef)
for row in table_create.fetchall():
    print(row)


create table if not exists itr_mdt.portfolio (
    company_name varchar,
    company_lei varchar,
    company_id varchar,
    investment_value bigint
) with (
format = 'parquet',
external_location = 's3a://ocp-odh-os-demo-s3/trino/itr_mdt/portfolio/'
)

(True,)


### Company Data

In [15]:
# We have no S3 emissions in RMI data.

engine.execute("select * from sec_dera.sic_isic").fetchall()

[(1400, 1410),
 (2911, 1920),
 (3714, 2910),
 (3829, 3190),
 (3312, 2410),
 (4911, 4010),
 (4931, 4010),
 (4932, 4010),
 (4991, 4010)]

In [16]:
gleif_dict['AES Corp'.upper()]

'2NUNNB7D43COUIRE5295'

In [17]:
gleif_2['AES CORP']

'2NUNNB7D43COUIRE5295'

In [18]:
# Search for NEXTERA ENERGY PARTNERS LP
qres = engine.execute("select * from rmi_20211120.utility_information where parent_lei='5493008F4ZOQFNG3WN54'")
l = qres.fetchall()
print(len(l))

0


In [19]:
l

[]

In [20]:
lei = '2NUNNB7D43COUIRE5295'
qres=engine.execute(f"""
select count (*), U.parent_name, U.parent_lei, 'co2', sum(EM.emissions_co2)*1000000, 'nox', sum(EM.emissions_nox), 'co2e', sum(EM.emissions_co2)*1000000 + 265 * sum(coalesce(EM.emissions_nox,0)), EM.year
from rmi_20211120.utility_information as U join rmi_20211120.operations_emissions_by_fuel as EM on U.respondent_id=EM.respondent_id
where U.parent_lei='{lei}'
group by U.parent_name, U.parent_lei, EM.year
order by EM.year desc
""")
qres.fetchall()

[(29, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 9425520.152473329, 'nox', None, 'co2e', 9425520.152473329, '2020-01-01 00:00:00.000'),
 (29, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 11616368.146942126, 'nox', 7644.781547817038, 'co2e', 13642235.257113641, '2019-01-01 00:00:00.000'),
 (29, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 11235889.892304936, 'nox', 9186.028876377723, 'co2e', 13670187.544545032, '2018-01-01 00:00:00.000'),
 (63, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 10483392.150493385, 'nox', 9370.12129119441, 'co2e', 12966474.292659905, '2017-01-01 00:00:00.000'),
 (83, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 20950763.511785727, 'nox', 21862.279738238183, 'co2e', 26744267.642418846, '2016-01-01 00:00:00.000'),
 (93, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 22075533.730533343, 'nox', 25317.484745301856, 'co2e', 28784667.188038334, '2015-01-01 00:00:00.000'),
 (92, 'AES Corp.', '2NUNNB7D43COUIRE5295', 'co2', 27647741.53420725, 'nox', 34320.420396608606, 'co2e'

In [21]:
qres = engine.execute(f"""
select F.name, F.lei, T.tname, U2.parent_ticker, F.sic, F.ddate,
       'revenue' as rl, round (F.revenue_usd/1000000.0, 1), round (RT2.fy_revenue_total/1000000.0, 1), round (CS2.fy_revenues/1000000.0, 1), round (F.revenue_usd/RT2.fy_revenue_total, 1), round (F.revenue_usd/CS2.fy_revenues, 1),
       'market_cap' as fl, round (F.market_cap_usd/1000000.0, 1),
       'EV' as el, round ((F.market_cap_usd+F.debt_usd-F.cash_usd)/1000000, 1),
       'assets' as al, round (F.assets_usd/1000000.0, 1), round (AEI2.asset_value/1000000.0, 1), round (F.assets_usd/AEI2.asset_value, 1),
       'cash' as cc, round (F.cash_usd/1000000.0, 1),
       -- 'income' as il, F.income_usd/1000000.0, AEI2.fy_earnings_value/1000000.0, F.income_usd/AEI2.fy_earnings_value,
       'counts: ulei, aei, rt, cs' as legend, c_ulei, c_aei, c_rt, c_cs
from {ingest_schema}.portfolio as P
     join (select count (*) as c_ulei, U.parent_name, U.parent_lei, U.parent_ticker
           from rmi_20211120.utility_information as U
           group by U.parent_name, U.parent_lei, U.parent_ticker) as U2 on U2.parent_lei=P.company_lei
     join sec_dera.financials_by_lei as F on F.lei=P.company_lei
     join (select count (*) as c_cs, CS.parent_name, CS.year, sum(revenues) as fy_revenues
           from rmi_20211120.customers_sales as CS
           group by CS.parent_name, CS.year) as CS2 on CS2.parent_name=U2.parent_name and year(CS2.year)=year(F.ddate)
     join (select count (*) as c_aei, AEI.parent_name, AEI.year, sum(AEI.asset_value) as asset_value, sum(AEI.earnings_value) as fy_earnings_value
           from rmi_20211120.assets_earnings_investments as AEI
           group by AEI.parent_name, AEI.year) as AEI2 on AEI2.parent_name=U2.parent_name and year(AEI2.year)=year(F.ddate)
     left join (select count (*) as c_rt, RT.parent_name, RT.year, sum(RT.revenue_total) as fy_revenue_total
           from rmi_20211120.revenue_by_tech as RT
           group by RT.parent_name, RT.year) as RT2 on RT2.parent_name=U2.parent_name and year(RT2.year)=year(F.ddate)
     left join sec_dera.ticker T on F.cik=T.cik and upper(T.tname)=U2.parent_ticker
where year(F.ddate)=2019
order by F.name
""")

l = qres.fetchall()
print(len(l))
[x for x in l if any(x) is None]

35


[]

In [22]:
l

[('AES CORP', '2NUNNB7D43COUIRE5295', 'aes', 'AES', 4991, '2019-12-31 00:00:00.000', 'revenue', 10189.0, 2042.7, 2180.7, 5.0, 4.7, 'market_cap', 10870.0, 'EV', 10102.0, 'assets', 33648.0, 4351.3, 7.7, 'cash', 1029.0, 'counts: ulei, aei, rt, cs', 2, 34, 49, 12),
 ('ALGONQUIN POWER & UTILITIES CORP.', '549300K5VIUTJXQL7X75', 'aqn', 'AQN', 4911, '2019-12-31 00:00:00.000', 'revenue', 1626.4, 692.0, 767.0, 2.4, 2.1, 'market_cap', None, 'EV', None, 'assets', 10920.8, 2237.8, 4.9, 'cash', 62.5, 'counts: ulei, aei, rt, cs', 3, 46, 61, 18),
 ('ALLETE INC', '549300NNLSIMY6Z8OT86', 'ale', 'ALE', 4931, '2019-12-31 00:00:00.000', 'revenue', 1240.5, 739.7, 955.0, 1.7, 1.3, 'market_cap', 4285.3, 'EV', 5829.8, 'assets', 5482.8, 3829.5, 1.4, 'cash', 69.3, 'counts: ulei, aei, rt, cs', 2, 35, 51, 12),
 ('ALLIANT ENERGY CORP', '5493009ML300G373MZ12', 'lnt', 'LNT', 4931, '2019-12-31 00:00:00.000', 'revenue', 3648.0, 2777.7, 3059.2, 1.3, 1.2, 'market_cap', 11600.0, 'EV', 18504.0, 'assets', 16701.0, 10006.9,

In [23]:
engine.execute("select * from sec_dera.num where tag = 'ComputedTreasuryFloat'").fetchall()

[('0001004315-20-000053', 'ComputedTreasuryFloat', None, None, '2020-03-31 00:00:00.000', 0, 'GBP', 35773272349.94118, None, 'computed'),
 ('0001292814-20-001419', 'ComputedTreasuryFloat', None, None, '2019-12-31 00:00:00.000', 0, 'BRL', 25135071.515980467, None, 'computed'),
 ('0001004315-19-000071', 'ComputedTreasuryFloat', None, None, '2019-03-31 00:00:00.000', 0, 'GBP', 31403511080.169678, None, 'computed'),
 ('0001292814-19-001565', 'ComputedTreasuryFloat', None, None, '2018-12-31 00:00:00.000', 0, 'BRL', 17829125433.399166, None, 'computed'),
 ('0001004315-21-000067', 'ComputedTreasuryFloat', None, None, '2021-03-31 00:00:00.000', 0, 'GBP', 32929055967.57895, None, 'computed'),
 ('0001292814-21-001201', 'ComputedTreasuryFloat', None, None, '2020-12-31 00:00:00.000', 0, 'BRL', 105663839.15860832, None, 'computed')]

In [24]:
qres = engine.execute(f"""
select DISTINCT 'P.company_name', 'P.company_lei', 'P.company_id', 'S2I.isic',
       'F.country', 'region', 'sector', 'ghg_s1s2', 'ghg_s3',
       'F.revenue_usd', 'F.market_cap_usd', 'EV', 'F.assets_usd', 'F.cash_usd',
       'target_probability'
""")
l = qres.fetchall()
print(l)

sql = f"""
select DISTINCT P.company_name, P.company_lei, P.company_id, S2I.isic,
       F.country, 'North America' as region,
       'Electricity Utilities' as sector, sum(E.emissions_co2 + (265/1000000.0)*coalesce(E.emissions_nox, 0)) as ghg_s1s2, NULL as ghg_s3,
       F.revenue_usd as company_revenue,
       F.market_cap_usd as company_market_cap,
       F.market_cap_usd+F.debt_usd-F.cash_usd as company_enterprise_value,
       F.assets_usd as company_total_assets,
       F.cash_usd as company_cash_equivalents,
       cast(0.5 as real) as target_probability
from {ingest_schema}.portfolio as P
     left join sec_dera.financials_by_lei as F on F.lei=P.company_lei and F.ddate>=DATE('2019-01-01') and F.ddate<DATE('2020-01-01')
     -- join sec_dera.sub as S on S.cik=F.cik
     left join rmi_20211120.utility_information as U on U.parent_lei=P.company_lei
     -- left join gleif_mdt.gleif_isin_lei G on G.lei=P.lei and G.isin=U.parent_isin
     left join sec_dera.sic_isic as S2I on S2I.sic=F.sic
     left join rmi_20211120.operations_emissions_by_fuel as E on U.respondent_id=E.respondent_id and year(E.year)=year(F.ddate)
group by P.company_name, P.company_lei, P.company_id, S2I.isic,
       F.country, 'North America', 'Electric Utilties', NULL,
       F.revenue_usd, F.market_cap_usd, F.market_cap_usd+F.debt_usd-F.cash_usd, F.assets_usd, F.cash_usd,
       cast(0.5 as real)
order by P.company_name
limit 200
"""

qres = engine.execute(sql)
l = qres.fetchall()
print(len(l))
display(l[0:40])

[('P.company_name', 'P.company_lei', 'P.company_id', 'S2I.isic', 'F.country', 'region', 'sector', 'ghg_s1s2', 'ghg_s3', 'F.revenue_usd', 'F.market_cap_usd', 'EV', 'F.assets_usd', 'F.cash_usd', 'target_probability')]
35


[('AES Corp.', '2NUNNB7D43COUIRE5295', 'US00130H1059', 4010, 'US', 'North America', 'Electricity Utilities', 11.616368146942126, None, 10189000000.0, 10870000000.0, 10102000000.0, 33648000000.0, 1029000000.0, 0.5),
 ('ALLETE, Inc.', '549300NNLSIMY6Z8OT86', 'US0185223007', 4010, 'US', 'North America', 'Electricity Utilities', 4.383048126075728, None, 1240500000.0, 4285299935.0, 5829799935.0, 5482800000.0, 69300000.0, 0.5),
 ('Algonquin Power & Utilities Corp.', '549300K5VIUTJXQL7X75', 'US0158577090', 4010, 'CA', 'North America', 'Electricity Utilities', 3.327286484894897, None, 1626392000.0, None, None, 10920786000.0, 62485000.0, 0.5),
 ('Alliant Energy', '5493009ML300G373MZ12', 'US0188021085', 4010, 'US', 'North America', 'Electricity Utilities', 11.08166774363018, None, 3648000000.0, 11600000000.0, 18504000000.0, 16701000000.0, 16000000.0, 0.5),
 ('Ameren Corp.', 'XRZQ5S7HYJFPHJ78L959', 'US0236081024', 4010, 'US', 'North America', 'Electricity Utilities', 23.40945202412854, None, 5910

In [25]:
financial_df = pd.read_sql(sql, engine)
print(len(financial_df))

35


### Load emissions target data

In [26]:
engine.execute("describe rmi_20211120.emissions_targets").fetchall()

[('parent_name', 'varchar', '', ''),
 ('utility_name', 'varchar', '', ''),
 ('respondent_id', 'integer', '', ''),
 ('year', 'timestamp(3)', '', ''),
 ('co2_historical', 'double', '', ''),
 ('co2_target', 'double', '', ''),
 ('co2_target_all_years', 'double', '', ''),
 ('co2_1point5c', 'double', '', ''),
 ('generation_historical', 'double', '', ''),
 ('generation_projected', 'double', '', ''),
 ('generation_1point5c', 'double', '', ''),
 ('co2_intensity_historical', 'double', '', ''),
 ('co2_intensity_target', 'double', '', ''),
 ('co2_intensity_target_all_years', 'double', '', ''),
 ('co2_intensity_1point5c', 'double', '', '')]

In [27]:
targets_df = pd.read_sql(f"""
select ET.parent_name as company_name, year, co2_target_all_years, co2_intensity_target_all_years, generation_projected
from rmi_20211120.emissions_targets ET
     -- left join (select parent_name, parent_lei from rmi_20211120.utility_information group by parent_name, parent_lei) U
     --      on ET.parent_name=U.parent_name
""", engine, parse_dates=['year'])

print(len(targets_df))

9945


In [28]:
def compute_sums_and_wavg(x):
    d = {}
    d['co2_target_by_year'] = x['co2_target_all_years'].sum()
    d['co2_intensity_target_by_year'] = (x['generation_projected'] * x['co2_intensity_target_all_years']).sum() / x['generation_projected'].sum()
    d['generation_by_year'] = x['generation_projected'].sum()
    return pd.Series(d, index=['co2_target_by_year', 'co2_intensity_target_by_year', 'generation_by_year'])

df = (targets_df[targets_df.year.dt.year>=2019]
      .fillna(0).groupby(['company_name', 'year'])
      .apply(compute_sums_and_wavg)
      .sort_values(['company_name', 'year'], ascending=[True, False])
     )

  d['co2_intensity_target_by_year'] = (x['generation_projected'] * x['co2_intensity_target_all_years']).sum() / x['generation_projected'].sum()


In [29]:
df.loc[('AES Corp.','2045-01-01')]

co2_target_by_year               0.000000
co2_intensity_target_by_year     0.000000
generation_by_year              20.279687
Name: (AES Corp., 2045-01-01 00:00:00), dtype: float64

In [30]:
df.loc['AES Corp.']

Unnamed: 0_level_0,co2_target_by_year,co2_intensity_target_by_year,generation_by_year
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2050-01-01,0.0,0.0,20.996221
2049-01-01,0.0,0.0,20.850879
2048-01-01,0.0,0.0,20.706562
2047-01-01,0.0,0.0,20.563262
2046-01-01,0.0,0.0,20.420973
2045-01-01,0.0,0.0,20.279687
2044-01-01,0.0,0.0,20.139397
2043-01-01,0.0,0.0,20.000096
2042-01-01,0.0,0.0,19.861777
2041-01-01,0.0,0.0,19.724434


In [31]:
df.loc['Cleco Partners LP'] # Needs to be fixed

Unnamed: 0_level_0,co2_target_by_year,co2_intensity_target_by_year,generation_by_year
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2050-01-01,0.0,0.0,13.861239
2049-01-01,0.0,0.0,13.861239
2048-01-01,0.0,0.0,13.861239
2047-01-01,0.0,0.0,13.861239
2046-01-01,0.0,0.0,13.861239
2045-01-01,0.0,0.0,13.861239
2044-01-01,0.0,0.0,13.861239
2043-01-01,0.0,0.0,13.861239
2042-01-01,0.0,0.0,13.861239
2041-01-01,0.0,0.0,13.861239


In [32]:
xdf = df.reset_index()
xdf.year = xdf.year.map(lambda x: x.year)
pdf = xdf.pivot(index=['company_name'], columns='year').reset_index()
pdf.insert(1, 'company_lei', pdf.company_name.str.upper().map(gleif_match))
pdf.insert(2, 'company_id', pdf.company_lei.map(rmi_dict))
pdf = pdf.set_index(['company_name','company_lei', 'company_id'], drop=True)
pdf.columns.names=[None,None]
pdf

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,co2_target_by_year,co2_target_by_year,co2_target_by_year,co2_target_by_year,co2_target_by_year,co2_target_by_year,co2_target_by_year,co2_target_by_year,co2_target_by_year,co2_target_by_year,...,generation_by_year,generation_by_year,generation_by_year,generation_by_year,generation_by_year,generation_by_year,generation_by_year,generation_by_year,generation_by_year,generation_by_year
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,2019,2020,2021,2022,2023,2024,2025,2026,2027,2028,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
company_name,company_lei,company_id,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
AES Corp.,2NUNNB7D43COUIRE5295,US00130H1059,11.994886,11.423701,10.852516,10.281331,9.710146,9.138961,8.567776,7.996591,7.425406,6.854221,...,19.724434,19.861777,20.000096,20.139397,20.279687,20.420973,20.563262,20.706562,20.850879,20.996221
"ALLETE, Inc.",549300NNLSIMY6Z8OT86,US0185223007,4.383048,4.241659,4.100271,3.958882,3.817494,3.676105,3.534716,3.393328,3.251939,3.110550,...,9.037822,9.066894,9.096059,9.125318,9.154672,9.184119,9.213662,9.243299,9.273032,9.302861
Alcoa Corp.,549300T12EZ1F6PWWU29,US0138721065,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422
Algonquin Power & Utilities Corp.,549300K5VIUTJXQL7X75,US0158577090,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375
Alliant Energy,5493009ML300G373MZ12,US0188021085,11.081668,10.577956,10.074243,9.570531,9.066819,8.563107,8.059395,7.555683,7.051970,6.548258,...,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Wabash Valley Power Assn, Inc",,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,4.906369,4.906369,4.906369,4.906369,4.906369,4.906369,4.906369,4.906369,4.906369,4.906369
"Washington Electric Coop., Inc.",,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.058438,1.058438,1.058438,1.058438,1.058438,1.058438,1.058438,1.058438,1.058438,1.058438
Wolverine Power Supply Coop.,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.782358,1.782358,1.782358,1.782358,1.782358,1.782358,1.782358,1.782358,1.782358,1.782358
"Xcel Energy, Inc.",LGJNMI9GH8XIDG5RCM61,US98389B1008,43.151269,40.042084,36.932900,33.823715,30.714530,27.605346,24.496161,21.386977,18.277792,15.168608,...,106.822591,107.504014,108.191315,108.884553,109.583790,110.289086,111.000502,111.718103,112.441950,113.172107


In [33]:
co2_df = pdf.co2_target_by_year.reset_index()
co2_df = co2_df[co2_df.company_id.notna()]
co2_df.insert(3, 'variable', 'S1+S2')
co2_df

Unnamed: 0,company_name,company_lei,company_id,variable,2019,2020,2021,2022,2023,2024,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,AES Corp.,2NUNNB7D43COUIRE5295,US00130H1059,S1+S2,11.994886,11.423701,10.852516,10.281331,9.710146,9.138961,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"ALLETE, Inc.",549300NNLSIMY6Z8OT86,US0185223007,S1+S2,4.383048,4.241659,4.100271,3.958882,3.817494,3.676105,...,1.272498,1.131109,0.989721,0.848332,0.706943,0.565555,0.424166,0.282777,0.141389,0.0
2,Alcoa Corp.,549300T12EZ1F6PWWU29,US0138721065,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Algonquin Power & Utilities Corp.,549300K5VIUTJXQL7X75,US0158577090,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Alliant Energy,5493009ML300G373MZ12,US0188021085,S1+S2,11.081668,10.577956,10.074243,9.570531,9.066819,8.563107,...,2.493375,2.216334,1.939292,1.66225,1.385208,1.108167,0.831125,0.554083,0.277042,0.0
5,Ameren Corp.,XRZQ5S7HYJFPHJ78L959,US0236081024,S1+S2,23.438028,23.269807,23.101586,22.933365,22.765143,22.596922,...,5.828651,5.181023,4.533395,3.885767,3.238139,2.590511,1.942884,1.295256,0.647628,0.0
6,"American Electric Power Co., Inc.",1B4S6S7G0TW5EE83BO58,US0255371017,S1+S2,67.60567,64.7818,61.95793,59.13406,56.310189,53.486319,...,20.488229,19.028696,17.569162,16.109629,14.650095,13.190562,11.731028,10.271494,8.811961,7.352427
7,American States Water,529900L26LIS2V8PWM23,US0298991011,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Avangrid, Inc.",549300OX0Q38NLSKPB49,US05351W1036,S1+S2,0.025963,0.02306,0.020157,0.017254,0.014351,0.011448,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Avista Corp.,Q0IK63NITJD6RJ47SW96,US05379B1070,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
co2_ei_df = pdf.co2_intensity_target_by_year.reset_index()
co2_ei_df.iloc[:, 3] = 2*co2_ei_df.iloc[:, 4] - co2_ei_df.iloc[:, 5]
co2_ei_df = co2_ei_df[co2_ei_df.company_id.notna()]
co2_ei_df.insert(3, 'variable', 'S1+S2')
co2_ei_df

Unnamed: 0,company_name,company_lei,company_id,variable,2019,2020,2021,2022,2023,2024,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,AES Corp.,2NUNNB7D43COUIRE5295,US00130H1059,S1+S2,0.707746,0.669865,0.631985,0.594595,0.557689,0.521264,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"ALLETE, Inc.",549300NNLSIMY6Z8OT86,US0185223007,S1+S2,0.520359,0.502067,0.483775,0.465596,0.447528,0.429571,...,0.140797,0.124752,0.108808,0.092965,0.077222,0.06158,0.046037,0.030593,0.015247,0.0
2,Alcoa Corp.,549300T12EZ1F6PWWU29,US0138721065,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Algonquin Power & Utilities Corp.,549300K5VIUTJXQL7X75,US0158577090,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Alliant Energy,5493009ML300G373MZ12,US0188021085,S1+S2,0.440366,0.420349,0.400332,0.380316,0.360299,0.340283,...,0.099082,0.088073,0.077064,0.066055,0.055046,0.044037,0.033027,0.022018,0.011009,0.0
5,Ameren Corp.,XRZQ5S7HYJFPHJ78L959,US0236081024,S1+S2,0.642677,0.633435,0.624193,0.615053,0.606015,0.597077,...,0.13567,0.119699,0.103957,0.088443,0.073154,0.058088,0.043242,0.028613,0.0142,0.0
6,"American Electric Power Co., Inc.",1B4S6S7G0TW5EE83BO58,US0255371017,S1+S2,0.60141,0.577082,0.552755,0.528466,0.504215,0.480003,...,0.189374,0.176216,0.163081,0.149968,0.136876,0.123807,0.11076,0.097735,0.084733,0.071752
7,American States Water,529900L26LIS2V8PWM23,US0298991011,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Avangrid, Inc.",549300OX0Q38NLSKPB49,US05351W1036,S1+S2,0.02142,0.019025,0.01663,0.014235,0.01184,0.009445,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Avista Corp.,Q0IK63NITJD6RJ47SW96,US05379B1070,S1+S2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
gen_df = pdf.generation_by_year.reset_index()
gen_df.iloc[:, 3] = 2*gen_df.iloc[:, 4] - gen_df.iloc[:, 5]
gen_df = gen_df[gen_df.company_id.notna()]
gen_df.insert(3, 'variable', 'TWh')
gen_df

Unnamed: 0,company_name,company_lei,company_id,variable,2019,2020,2021,2022,2023,2024,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
0,AES Corp.,2NUNNB7D43COUIRE5295,US00130H1059,TWh,16.935348,17.053729,17.17211,17.291331,17.411398,17.532319,...,19.724434,19.861777,20.000096,20.139397,20.279687,20.420973,20.563262,20.706562,20.850879,20.996221
1,"ALLETE, Inc.",549300NNLSIMY6Z8OT86,US0185223007,TWh,8.421215,8.448391,8.475567,8.50283,8.530181,8.55762,...,9.037822,9.066894,9.096059,9.125318,9.154672,9.184119,9.213662,9.243299,9.273032,9.302861
2,Alcoa Corp.,549300T12EZ1F6PWWU29,US0138721065,TWh,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,...,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422,1.026422
3,Algonquin Power & Utilities Corp.,549300K5VIUTJXQL7X75,US0158577090,TWh,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,...,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375,6.367375
4,Alliant Energy,5493009ML300G373MZ12,US0188021085,TWh,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,...,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692,25.164692
5,Ameren Corp.,XRZQ5S7HYJFPHJ78L959,US0236081024,TWh,36.417004,36.692056,36.967107,37.24422,37.523411,37.804694,...,42.922388,43.244144,43.568311,43.894908,44.223953,44.555465,44.889462,45.225963,45.564986,45.906551
6,"American Electric Power Co., Inc.",1B4S6S7G0TW5EE83BO58,US0255371017,TWh,99.835176,99.916774,99.998372,100.080204,100.162272,100.244575,...,101.680566,101.767246,101.854176,101.941358,102.028792,102.116478,102.204418,102.292613,102.381063,102.469769
7,American States Water,529900L26LIS2V8PWM23,US0298991011,TWh,0.000385,0.000385,0.000385,0.000385,0.000385,0.000385,...,0.000385,0.000385,0.000385,0.000385,0.000385,0.000385,0.000385,0.000385,0.000385,0.000385
8,"Avangrid, Inc.",549300OX0Q38NLSKPB49,US05351W1036,TWh,1.21212,1.21212,1.21212,1.21212,1.21212,1.21212,...,1.21212,1.21212,1.21212,1.21212,1.21212,1.21212,1.21212,1.21212,1.21212,1.21212
9,Avista Corp.,Q0IK63NITJD6RJ47SW96,US05379B1070,TWh,9.61371,9.712053,9.810397,9.909866,10.010473,10.112232,...,12.031678,12.156569,12.28289,12.410657,12.539886,12.670594,12.802798,12.936515,13.071762,13.208558


In [36]:
with pd.ExcelWriter("rmi-20211120-output.xlsx", datetime_format="YYYY") as writer:
    financial_df.to_excel(writer, sheet_name="fundamental_data", index=False)
    co2_df.to_excel(writer, sheet_name="projected_target", index=False)
    co2_ei_df.to_excel(writer, sheet_name="projected_ei_in_Wh", index=False)
    gen_df.to_excel(writer, sheet_name="projected_production", index=False)

In [38]:
portfolio_zero = portfolio_df.copy()
portfolio_zero.target_probability = 0.0
portfolio_one = portfolio_df.copy()
portfolio_one.target_probability = 1.0

portfolio_df.to_csv("rmi-20211120-portfolio.csv", sep=';', index=False)