This notebook is intended to help an analyst do some math checks on targets vs trajectories vs benchmarks for a company in a region and a sector.

It uses the OECM benchmark for production and intensity values. Subject data is formatted according to the rules of the ITR Data template.

**Cell 4 is where benchmark is manually selected**

**Cell 5 is where sample data file is loaded**

**Cell 9 must be set manually** (based on sector/region of subject company set in **cell 8**)

In [1]:
import os
import sys
import json
import argparse
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go 

In [2]:
import ITR
from ITR.data.base_providers import BaseProviderProductionBenchmark, BaseProviderIntensityBenchmark
from ITR.interfaces import ETimeFrames, EScope, IProductionBenchmarkScopes, IEIBenchmarkScopes, DF_ICompanyEIProjections
from ITR.data.template import TemplateProviderCompany
from ITR.data.data_warehouse import DataWarehouse
from ITR.data.osc_units import PA_

In [3]:
import logging
root_logger = logging.getLogger()
root_logger.setLevel("INFO")

In [4]:
self_root = os.path.abspath('')
benchmark_prod_json = os.path.join(self_root, "data", "json-units", "benchmark_production_OECM.json")
benchmark_EI_OECM = os.path.join(self_root, "data", "json-units", "benchmark_EI_OECM_S3.json")
benchmark_EI_TPI = os.path.join(self_root, "data", "json-units", "benchmark_EI_TPI_1_5_degrees.json")

# load production benchmarks
with open(benchmark_prod_json) as json_file:
    parsed_json = json.load(json_file)
prod_bms = IProductionBenchmarkScopes.parse_obj(parsed_json)
production_bm = BaseProviderProductionBenchmark(production_benchmarks=prod_bms)

# load intensity benchmarks

# OECM
with open(benchmark_EI_OECM) as json_file:
    parsed_json = json.load(json_file)
ei_bms = IEIBenchmarkScopes.parse_obj(parsed_json)
OECM_EI_bm = BaseProviderIntensityBenchmark(EI_benchmarks=ei_bms)

# TPI
with open(benchmark_EI_TPI) as json_file:
    parsed_json = json.load(json_file)
ei_bms = IEIBenchmarkScopes.parse_obj(parsed_json)
TPI_EI_bm = BaseProviderIntensityBenchmark(EI_benchmarks=ei_bms)

intensity_bm = OECM_EI_bm

In [5]:
template_data_path = "data/20230106 ITR V2 Sample Data.xlsx"
# template_data_path = "data/20220927 ITR Tool Sample Data.xlsx"

# Remove the # and space on the next line to point the template_data_path variable at your own data
# template_data_path = "data/your_template_here.xlsx"

template_company_data = TemplateProviderCompany(excel_path=template_data_path)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(
2023-01-30 10:16:20,045 - ITR.data.template - ERROR - The following companies have ESG data defined but no fundamental data and will be removed from further analysis:
['US21037T1097' 'MYL3794OO004']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_esg.drop(columns='unit', inplace=True)
            (              'Ag Chem', 'NL0000009827',   'S2'),
            (    'Consumer Products', 'DE000SYM9999', 'S1S2'),
            ('Electricity Utilities', 'US0255371017',   'S2'),
            ('Electricity Utilities', 'US18551QAA58', 'S1S2'),
            ('Electricity Utilities', 'US18551QAA58',   'S2'),
            ('Ele

In [6]:
template_provider = DataWarehouse(template_company_data, production_bm, intensity_bm,
                                 estimate_missing_data=DataWarehouse.estimate_missing_s3_data)

# Fills in template_company_data._companies[0].projected_targets.S1S2

print(f"Benchmark Temperature = {intensity_bm.benchmark_temperature}\n\
Benchmark Global Budget = {intensity_bm.benchmark_global_budget}\n\
AFOLU included = {intensity_bm.is_AFOLU_included}")

2023-01-30 10:16:54,604 - ITR.data.data_warehouse - INFO - calculating trajectories for 112 companies (times 5 scopes times 31 years)
2023-01-30 10:17:26,937 - ITR.data.base_providers - INFO - Normalizing intensity metrics
2023-01-30 10:17:26,963 - ITR.data.base_providers - ERROR - intensity values for company US6745991058-chem not compatible with benchmark (CO2e * kilogram / USD)
2023-01-30 10:17:27,447 - ITR.data.base_providers - INFO - Done normalizing intensity metrics
2023-01-30 10:17:27,447 - ITR.data.data_warehouse - INFO - Allocating emissions to align with benchmark data
2023-01-30 10:17:27,453 - ITR.data.data_warehouse - INFO - Already allocated emissions for CA87807B1076 across ['Electricity Utilities', 'Gas', 'Oil']
2023-01-30 10:17:27,483 - ITR.data.data_warehouse - INFO - Sector alignment complete
2023-01-30 10:17:27,483 - ITR.data.data_warehouse - INFO - estimating missing data
2023-01-30 10:17:27,491 - ITR.data.data_warehouse - ERROR - Company US6745991058-chem's S1+S2 

Benchmark Temperature = 1.5 delta_degree_Celsius
Benchmark Global Budget = 521.0526315789474 CO2 * gigametric_ton
AFOLU included = False


In [7]:
data, idx = zip(*[(i, (bm.sector, bm.region))
                  for i, bm in enumerate(production_bm._productions_benchmarks.AnyScope.benchmarks)])
production_bm_mapper = pd.Series(data, idx)

if intensity_bm == OECM_EI_bm:
    # The OECM benchmarks are all coordinated with each other
    ei_s1_bm_mapper = ei_s1s2_bm_mapper = ei_s3_bm_mapper = None
    ei_s1s2s3_bm_mapper = production_bm_mapper
else:
    # TPI benchmarks (possibly others) have their own rules
    data, idx = zip(*[(i, (bm.sector, bm.region))
                      for i, bm in enumerate(intensity_bm._EI_benchmarks.S1.benchmarks)])
    ei_s1_bm_mapper = pd.Series(data, idx)

    data, idx = zip(*[(i, (bm.sector, bm.region))
                      for i, bm in enumerate(intensity_bm._EI_benchmarks.S1S2.benchmarks)])
    ei_s1s2_bm_mapper = pd.Series(data, idx)

    data, idx = zip(*[(i, (bm.sector, bm.region))
                      for i, bm in enumerate(intensity_bm._EI_benchmarks.S3.benchmarks)])
    ei_s3_bm_mapper = pd.Series(data, idx)

    data, idx = zip(*[(i, (bm.sector, bm.region))
                      for i, bm in enumerate(intensity_bm._EI_benchmarks.S1S2S3.benchmarks)])
    ei_s1s2s3_bm_mapper = pd.Series(data, idx)

In [8]:
# RWE AG DE0007037129
# TotalEnergies FR0000120271
# Eni SPA Group IT0003132476
# BMW Group DE0005190003

# US Steel US9129091081
# Carpenter Technologies US1442851036
# Cleveland Cliffs US1858991011
# Commercial Metals US2017231034
# Nucore US6703461052
# Steel Dynamics US8581191009
# Timken Steel US8873991033
# Worthington Industries US9818111026
# POSCO KR7005490008

company_ids = [ 'DE0007037129', 'FR0000120271', 'IT0003132476', 'DE0005190003',
               # 'GB0007980591', 'US1667641005', 'US30231G1022', 'US56585A1025', '2222.SR',
               'US9129091081', 'US1442851036', 'US1858991011', 'US2017231034', 'US6703461052',
               'US8581191009', 'US8873991033', 'US9818111026', 'KR7005490008',
               ]

models = template_provider.get_preprocessed_company_data(company_ids)

In [9]:
models_dict = {}

for model in models:
    region = model.region if model.region in ['North America', 'Europe'] else 'Global'
    # The sector_region_idx uniquely identifies the sector and region of the subject company (or company line of business)
    sector_region_idx = production_bm_mapper.loc[model.sector, region]
    models_dict[sector_region_idx] = f"{model.sector} in {region}"

for k, v in models_dict.items():
    sector_prod_baseline = production_bm._productions_benchmarks.AnyScope.benchmarks[k].base_year_production
    print(f"setting sector_prod_baseline (total units of output) for {v} to {sector_prod_baseline}")

setting sector_prod_baseline (total units of output) for Electricity Utilities in Europe to 3336.154864 terawatt_hour
setting sector_prod_baseline (total units of output) for Energy in Europe to 68762.03156985894 petajoule
setting sector_prod_baseline (total units of output) for Autos in Europe to 5461.3840367186485 gigapkm
setting sector_prod_baseline (total units of output) for Steel in North America to 78.3 Fe * megametric_ton
setting sector_prod_baseline (total units of output) for Steel in Global to 1869.6 Fe * megametric_ton


In [11]:
def get_ei_scope_by_sector_region(scope, sector, region, bm_mapper):
    """
    """
    scoped_bm = getattr(intensity_bm._EI_benchmarks, scope)
    if scoped_bm and sector in bm_mapper.index:
        if (sector, region) in bm_mapper.index:
            ei_sector_region_idx = bm_mapper.loc[sector, region]
        elif (model.sector, "Global") in bm_mapper.index:
            ei_sector_region_idx = bm_mapper.loc[sector, "Global"]
        ei_data, ei_idx = zip(*[(ei.value, ei.year)
                              for ei in scoped_bm.benchmarks[ei_sector_region_idx].projections_nounits])
        sector_ei = pd.Series(PA_(ei_data, dtype=scoped_bm.benchmarks[ei_sector_region_idx].benchmark_metric), index=ei_idx)
    else:
        sector_ei = None
    return sector_ei

# From ICompanyEIProjections
def get_em_projections_from_ICompanyEIProjections(model_ei) -> pd.Series:
    """
    """
    if getattr(model_ei, 'S1S2S3'):
        data, idx = zip(*[(p.value.m, p.year) for p in model_ei.S1S2S3.projections])
    elif getattr(model_ei, 'S1S2'):
        data, idx = zip(*[(p.value.m, p.year) for p in model_ei.S1S2.projections])
    elif getattr(model_ei, 'S1'):
        data, idx = zip(*[(p.value.m, p.year) for p in model_ei.S1.projections])
    else:
        raise ValueError("no valid scope found for {model_ei}")
    return pd.Series(data, idx)

# From DF_ICompanyEIProjections
def get_em_projections(model_ei) -> pd.Series:
    """
    """
    if getattr(model_ei, 'S1S2S3'):
        if isinstance(model_ei.S1S2S3, DF_ICompanyEIProjections):
            return model_ei.S1S2S3.projections
        return get_em_projections_from_ICompanyEIProjections(model_ei)
    elif getattr(model_ei, 'S1S2'):
        if isinstance(model_ei.S1S2, DF_ICompanyEIProjections):
            return model_ei.S1S2.projections
        return get_em_projections_from_ICompanyEIProjections(model_ei)        
    elif getattr(model_ei, 'S1'):
        if isinstance(model_ei.S1, DF_ICompanyEIProjections):
            return model_ei.S1.projections
        return get_em_projections_from_ICompanyEIProjections(model_ei)        
    else:
        raise ValueError("no valid scope found for {model_ei}")

In [12]:
sector_dfs = []

for i, model in enumerate(models):
    region = model.region if model.region in ['North America', 'Europe'] else 'Global'
    sector_region_idx = production_bm_mapper.loc[model.sector, region]
    sector_prod_baseline = production_bm._productions_benchmarks.AnyScope.benchmarks[sector_region_idx].base_year_production
    prod_data, prod_idx = zip(*[(p.value, p.year)
                              for p in production_bm._productions_benchmarks.AnyScope.benchmarks[sector_region_idx].projections_nounits])
    sector_production = pd.Series(prod_data, prod_idx)

    if ei_s1_bm_mapper is not None:
        sector_ei_s1 = get_ei_scope_by_sector_region('S1', model.sector, region, ei_s1_bm_mapper)
    else:
        sector_ei_s1 = None
    if ei_s1s2_bm_mapper is not None:
        sector_ei_s1s2 = get_ei_scope_by_sector_region('S1S2', model.sector, region, ei_s1s2_bm_mapper)
    else:
        sector_ei_s1s2 = None
    if ei_s3_bm_mapper is not None:
        sector_ei_s3 = get_ei_scope_by_sector_region('S3', model.sector, region, ei_s3_bm_mapper)
    else:
        sector_ei_s3 = None
    if ei_s1s2s3_bm_mapper is not None:
        sector_ei_s1s2s3 = get_ei_scope_by_sector_region('S1S2S3', model.sector, region, ei_s1s2s3_bm_mapper)
    else:
        sector_ei_s1s2s3 = None
    sector_growth_partial = sector_production.add(1).cumprod()
    data, idx = zip(*[(p.value.m, p.year)
                  for p in model.historic_data.productions if p.year in [2019,2020] ])
    co_historic_productions = pd.Series(data, idx)

    co_projected_productions = (
        co_historic_productions[2020]
        * sector_growth_partial[sector_growth_partial.index>2020]
    )

    co_productions = pd.concat([co_historic_productions, co_projected_productions]).astype(f"pint[{model.production_metric}]")

    co_ei_trajectory = get_em_projections(model.projected_intensities)
    co_ei_target = get_em_projections(model.projected_targets)

    plot_dict = {
        "Trajectory": (co_productions * co_ei_trajectory).astype('pint[t CO2]').pint.m.cumsum(),
        "Target": (co_productions * co_ei_target).astype('pint[t CO2]').pint.m.cumsum(),
    }
    if model.scope==EScope.S1:
        if sector_ei_s1 is not None:
            plot_dict["BenchmarkS1"] = (co_productions[2019] * (sector_growth_partial * sector_ei_s1)).astype('pint[t CO2]').pint.m.cumsum()
        else:
            continue
    elif model.scope==EScope.S1S2:
        if sector_ei_s1s2 is not None:
            plot_dict["BenchmarkS1S2"] = (co_productions[2019] * (sector_growth_partial * sector_ei_s1s2)).astype('pint[t CO2]').pint.m.cumsum()
        else:
            continue
    elif model.scope==EScope.S3:
        if sector_ei_s3 is not None:
            plot_dict["BenchmarkS3"] = (co_productions[2019] * (sector_growth_partial * sector_ei_s3)).astype('pint[t CO2]').pint.m.cumsum()
        else:
            continue
    elif model.scope==EScope.S1S2S3:
        if sector_ei_s1s2s3 is not None:
            plot_dict["BenchmarkS1S2S3"] = (co_productions[2019] * (sector_growth_partial * sector_ei_s1s2s3)).astype('pint[t CO2]').pint.m.cumsum()
        else:
            continue
    else:
        continue
    sector_df = pd.DataFrame(plot_dict)
    fig = px.line(sector_df, y=[k for k in plot_dict.keys()],
                  labels={'index':'Year', 'value':'t CO2', 'variable':f"{model.company_name}<br>{model.company_id}"})
    fig.write_image(f"images/co2_bm_{i}.jpeg")
    fig.show