In [1]:
%load_ext autoreload
%autoreload 2

# imports
import pandas as pd
import nzip

# ignore some junk output
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

### Load data

In [2]:
nzip_path = 'N-ZIP-Model_version1_2_AG_updated_19_12_2023.xlsb'
sector_defs_path = 'nzip_model_sector_map.csv'
sector = 'Industry'
output_file = 'sd-industry-test.xlsx'

df = nzip.load_nzip(nzip_path, sector_defs_path, sector)

EE sectors in map but not NZIP: set(), EE sectors in NZIP but not map: {nan}


In [3]:
df = nzip.add_cols(df.copy())

### Measure level data

In [4]:
measure_level_kwargs = [
    # Add total direct and indirect emissions
    {
        "timeseries": "Total direct emissions abated (MtCO2e)",
        "variable_name": "Abatement total direct",
        "variable_unit": "MtCO2e",
    },
    {
        "timeseries": "Total indirect emissions abated (MtCO2e)",
        "variable_name": "Abatement total indirect",
        "variable_unit": "MtCO2e",
    },

    # Add emissions by gas
    {
        "timeseries": "Total direct emissions abated (MtCO2e)",
        "variable_name": "Abatement emissions CO2",
        "variable_unit": "MtCO2",
        "weight_col": "% CARBON Emissions",
    },
    {
        "timeseries": "Total direct emissions abated (MtCO2e)",
        "variable_name": "Abatement emissions CH4",
        "variable_unit": "MtCO2e",
        "weight_col": "% CH4 Emissions",
    },
    {
        "timeseries": "Total direct emissions abated (MtCO2e)",
        "variable_name": "Abatement emissions N20",
        "variable_unit": "MtCO2e",
        "weight_col": "% N2O Emissions",
    },

    # Add demand
    {
        "timeseries": "Change in electricity use (GWh)",
        "variable_name": "Additional demand electricity",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Change in natural gas use (GWh)",
        "variable_name": "Additional demand gas",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Change in petroleum use (GWh)",
        "variable_name": "Additional demand petroleum",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Change in solid fuel use (GWh)",
        "variable_name": "Additional demand solid fuel",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Change in primary bioenergy use (GWh)",
        "variable_name": "Additional demand final bioenergy",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Change in hydrogen use (GWh)",
        "variable_name": "Additional demand hydrogen",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Change in non bio waste",
        "variable_name": "Additional demand final non-bio waste",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },

    # Add capex and opex
    {
        "timeseries": "capex",
        "variable_name": "Additional capital expenditure",
        "variable_unit": "£m",
    },
    {
        "timeseries": "AM levelised capex (£m)",
        "variable_name": "Additional capital expenditure annualised",
        "variable_unit": "£m",
    },
    {
        "timeseries": "capex low carbon",
        "variable_name": "Total capital expenditure low carbon",
        "variable_unit": "£m",
    },
    {
        "timeseries": "opex",
        "variable_name": "Additional operational expenditure",
        "variable_unit": "£m",
    },
    {
        "timeseries": "opex low carbon",
        "variable_name": "Total operational expenditure low carbon",
        "variable_unit": "£m",
    },

    # CCS
    {
        "timeseries": "Tonnes of CO2 captured (MtCO2)",
        "variable_name": "Additional CCS",
        "variable_unit": "MtCO2",
    },

    # these are intermediate variables
    {
        "timeseries": "total emissions abated",
        "variable_name": "total emissions abated",
        "variable_unit": "MtCO2e",
    },
    {
        "timeseries": "cost differential",
        "variable_name": "cost differential",
        "variable_unit": "£m",
    },
    {
        "timeseries": "cum total emissions abated",
        "variable_name": "cum total emissions abated",
        "variable_unit": "MtCO2e",
    },
    {
        "timeseries": "cum cost differential",
        "variable_name": "cum cost differential",
        "variable_unit": "£m",
    },
]

In [5]:
reee_kwargs = [
    {
        "baseline_col": "Baseline emissions (MtCO2e)",
        "post_reee_col": "Post REEE baseline emissions (MtCO2e)",
        "out_col": "abatement",
        "usecols": "E:AL",
    },
    {
        "baseline_col": "Baseline electricity use (GWh)",
        "post_reee_col": "Post REEE baseline electricity use (GWh)",
        "out_col": "additional demand electricity",
        "usecols": "AM:BT",
    },
    {
        "baseline_col": "Baseline in natural gas use (GWh)",
        "post_reee_col": "Post REEE baseline in natural gas use (GWh)",
        "out_col": "additional demand gas",
        "usecols": "BU:DB",
    },
    {
        "baseline_col": "Baseline in petroleum use (GWh)",
        "post_reee_col": "Post REEE baseline in petroleum use (GWh)",
        "out_col": "additional demand petroleum",
        "usecols": "DC:EJ",
    },
    {
        "baseline_col": "Baseline in solid fuel use (GWh)",
        "post_reee_col": "Post REEE baseline in solid fuel use (GWh)",
        "out_col": "additional demand solid fuel",
        "usecols": "EK:FR",
    },
]

In [8]:
# write out the measure level data for this pathway
sd_df = nzip.sd_measure_level(df.copy(), measure_level_kwargs, reee_kwargs, nzip_path=nzip_path, baseline=False)
sd_df.to_excel(output_file, index=False, sheet_name='BP Measure level data')

In [9]:
# write a sheet containing the measure definitions
measure_defs_df = pd.DataFrame({
    'Sector': pd.Series(sd_df['Sector'].unique()).sort_values(),
    'Subsector': pd.Series(sd_df['Subsector'].unique()).sort_values(),
    'Measure Name': pd.Series(sd_df['Measure Name'].unique()).sort_values(),
    **{f'Category{i+3}: {category}': pd.Series(sd_df[f'Category{i+3}: {category}'].unique()).sort_values() for i, category in enumerate(nzip.CATEGORIES)}
})
with pd.ExcelWriter(output_file, mode='a', if_sheet_exists='replace') as writer:
    measure_defs_df.to_excel(writer, index=False, sheet_name='Measure definitions')


### Baseline pathway

In [10]:
baseline_kwargs = [
    {
        "timeseries": "Baseline emissions (MtCO2e)",
        "variable_name": "Baseline emissions CO2",
        "variable_unit": "MtCO2",
        "weight_col": "% CARBON Emissions",
    },
    {
        "timeseries": "Baseline emissions (MtCO2e)",
        "variable_name": "Baseline emissions CH4",
        "variable_unit": "MtCO2e",
        "weight_col": "% CH4 Emissions",
    },
    {
        "timeseries": "Baseline emissions (MtCO2e)",
        "variable_name": "Baseline emissions N20",
        "variable_unit": "MtCO2e",
        "weight_col": "% N2O Emissions",
    },
    {
        "timeseries": "Baseline electricity use (GWh)",
        "variable_name": "Baseline demand electricity",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Baseline in natural gas use (GWh)",
        "variable_name": "Baseline demand gas",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Baseline in petroleum use (GWh)",
        "variable_name": "Baseline demand petroleum",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Baseline in solid fuel use (GWh)",
        "variable_name": "Baseline demand solid fuel",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Baseline in primary bioenergy use (GWh)",
        "variable_name": "Baseline demand final bioenergy",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Baseline in hydrogen use (GWh)",
        "variable_name": "Baseline demand hydrogen",
        "variable_unit": "TWh",
        "scale": 1e-3,
    },
    {
        "timeseries": "Counterfactual capex (£m)",
        "variable_name": "Baseline capital expenditure",
        "variable_unit": "£m",
    },
    {
        "timeseries": "Counterfactual opex (£m)",
        "variable_name": "Baseline operational expenditure",
        "variable_unit": "£m",
    },
]

In [11]:
bl_df = nzip.sd_measure_level(df, baseline_kwargs, baseline=True)
bl_df = nzip.baseline_from_measure_level(bl_df)

In [12]:
with pd.ExcelWriter(output_file, mode='a', if_sheet_exists='replace') as writer:
    bl_df.to_excel(writer, index=False, sheet_name='Baseline data')

### Aggregate results

In [13]:
agg_df = nzip.get_aggregate_df(df, measure_level_kwargs, baseline_kwargs, sector)
with pd.ExcelWriter(output_file, mode='a', if_sheet_exists='replace') as writer:
    agg_df.to_excel(writer, index=False, sheet_name='Aggregate data')

# tests

In [14]:
nzip.col_search(df, 'baseline in pet')

[('Baseline in petroleum use (GWh) 2016', 90),
 ('Baseline in petroleum use (GWh) 2017', 90),
 ('Baseline in petroleum use (GWh) 2018', 90),
 ('Baseline in petroleum use (GWh) 2019', 90),
 ('Baseline in petroleum use (GWh) 2020', 90)]

In [15]:
reee_df = add_reee(df, **add_reee_kwargs[0])

In [16]:
nzip.aggregate_timeseries(reee_df, 're abatement', 'Abatement Emissions CO2', 'MtCO2e', measure='Resource Efficiency')

TypeError: aggregate_timeseries() takes 1 positional argument but 4 were given

In [None]:
emissions_cols = [f're abatement {y}' for y in nzip.YEARS]
agg_emissions_df = df.groupby(['CCC Subsector', 'Measure Technology'] + nzip.CATEGORIES)[emissions_cols].sum()
agg_emissions_df.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,re abatement 2021,re abatement 2022,re abatement 2023,re abatement 2024,re abatement 2025,re abatement 2026,re abatement 2027,re abatement 2028,re abatement 2029,re abatement 2030,...,re abatement 2041,re abatement 2042,re abatement 2043,re abatement 2044,re abatement 2045,re abatement 2046,re abatement 2047,re abatement 2048,re abatement 2049,re abatement 2050
CCC Subsector,Measure Technology,Dispersed or Cluster Site,Process,Selected Option,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Vehicles,Resource Efficiency,Dispersed,Boiler - Steam (Non BECCS allowed),,0.001735,0.001735,0.001675,0.001647,0.001611,0.001562,0.001542,0.001521,0.001512,0.001485,...,0.001597,0.001597,0.001597,0.001597,0.001597,0.001597,0.001597,0.001597,0.001597,0.001597
Vehicles,Resource Efficiency,Dispersed,Boiler - Steam (Non BECCS allowed),High T. Heat Pump,0.017999,0.026018,0.033203,0.041522,0.049943,0.058158,0.067822,0.078,0.08717,0.09541,...,0.124827,0.124827,0.124827,0.124827,0.124827,0.124827,0.124827,0.124827,0.124827,0.124827
Vehicles,Resource Efficiency,Dispersed,CHP (Non BECCS allowed),,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Vehicles,Resource Efficiency,Dispersed,Dryer - Vehicles,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Vehicles,Resource Efficiency,Dispersed,Furnace - Vehicles,,7.2e-05,7.2e-05,7e-05,6.9e-05,6.7e-05,6.5e-05,6.4e-05,6.3e-05,6.3e-05,6.2e-05,...,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05
Vehicles,Resource Efficiency,Dispersed,Furnace - Vehicles,Blue H2 Furnace,0.000714,0.001031,0.001316,0.001646,0.00198,0.002306,0.002689,0.003092,0.003456,0.003782,...,0.004949,0.004949,0.004949,0.004949,0.004949,0.004949,0.004949,0.004949,0.004949,0.004949
Vehicles,Resource Efficiency,Dispersed,Furnace - Vehicles,Green H2 Furnace,3.6e-05,5.2e-05,6.7e-05,8.4e-05,0.000101,0.000117,0.000137,0.000157,0.000176,0.000192,...,0.000252,0.000252,0.000252,0.000252,0.000252,0.000252,0.000252,0.000252,0.000252,0.000252
Vehicles,Resource Efficiency,Dispersed,Oven - Vehicles,,7.2e-05,7.2e-05,7e-05,6.9e-05,6.7e-05,6.5e-05,6.4e-05,6.3e-05,6.3e-05,6.2e-05,...,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05,6.7e-05
Vehicles,Resource Efficiency,Dispersed,Oven - Vehicles,Electric Infra-Red Heater,0.000648,0.000937,0.001196,0.001495,0.001798,0.002094,0.002442,0.002809,0.003139,0.003436,...,0.004495,0.004495,0.004495,0.004495,0.004495,0.004495,0.004495,0.004495,0.004495,0.004495
Vehicles,Resource Efficiency,Dispersed,Oven - Vehicles,Low T. Heat Pump,0.000102,0.000147,0.000188,0.000235,0.000282,0.000329,0.000383,0.000441,0.000492,0.000539,...,0.000705,0.000705,0.000705,0.000705,0.000705,0.000705,0.000705,0.000705,0.000705,0.000705


In [None]:

# add years as rows to a new dataframe, sum over the rows in the original dataframe
out_df = pd.DataFrame({'Year': nzip.YEARS})
out_df = out_df.set_index('Year')
for y in nzip.YEARS:
    out_df.loc[y, 're abatement (MtCO2e)'] = df[f're abatement {y}'].sum()
    out_df.loc[y, 'ee abatement (MtCO2e)'] = df[f'ee abatement {y}'].sum()
    out_df.loc[y, 'Baseline emissions (MtCO2e)'] = df[f'Baseline emissions (MtCO2e) {y}'].sum()
    out_df.loc[y, 'Post REEE baseline emissions (MtCO2e)'] = df[f'Post REEE baseline emissions (MtCO2e) {y}'].sum()
out_df.to_excel('reee_test_v3.xlsx', sheet_name='REEE data')

In [None]:
process = df['Process'] == 'Methane Leakage - FFP'
measure = df['Technology Type'] == 'Other'
option = df['Selected Option'] == 'Strong LDAR'
sector = df['Element_sector'] == 'Other Chemicals'
clus = df['Dispersed or Cluster Site'] == 'Cluster'
all_ = process & measure & option & sector & clus
mydf = df.loc[all_].copy()


In [None]:
for y in nzip.YEARS:
    mydf[f'my {y}'] = mydf[f'Cost Differential (£m) {y}'].copy() / (mydf[f'Total direct emissions abated (MtCO2e) {y}'].copy() + mydf[f'Total indirect emissions abated (MtCO2e) {y}'].copy())

    mydf[f'my 2 {y}'] = mydf[f'Cost Differential (£m) {y}'].copy().sum() / (mydf[f'Total direct emissions abated (MtCO2e) {y}'].copy() + mydf[f'Total indirect emissions abated (MtCO2e) {y}'].copy()).sum()

In [None]:
mydf['my 2 2021']

1447   -50.555879
1480   -50.555879
1600   -50.555879
1621   -50.555879
1660   -50.555879
1805   -50.555879
1877   -50.555879
1889   -50.555879
1998   -50.555879
2079   -50.555879
2093   -50.555879
2131   -50.555879
2181   -50.555879
2210   -50.555879
2221   -50.555879
2280   -50.555879
2322   -50.555879
2422   -50.555879
2543   -50.555879
2608   -50.555879
2717   -50.555879
2849   -50.555879
2859   -50.555879
2879   -50.555879
2893   -50.555879
2905   -50.555879
2973   -50.555879
3025   -50.555879
3089   -50.555879
3108   -50.555879
3118   -50.555879
3133   -50.555879
3233   -50.555879
3291   -50.555879
3330   -50.555879
3463   -50.555879
3514   -50.555879
3597   -50.555879
3631   -50.555879
3641   -50.555879
3651   -50.555879
3679   -50.555879
3707   -50.555879
3737   -50.555879
3747   -50.555879
3916   -50.555879
3968   -50.555879
3986   -50.555879
3996   -50.555879
4014   -50.555879
4067   -50.555879
4158   -50.555879
4187   -50.555879
4344   -50.555879
4709   -50.555879
4764   -50

In [None]:
'Technology Type' in df.columns

True

In [None]:
total_emissions_cols = [f'my {y}' for y in nzip.YEARS]
mydf.groupby(['CCC Subsector', 'Technology Type'] + nzip.CATEGORIES)[total_emissions_cols].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,my 2021,my 2022,my 2023,my 2024,my 2025,my 2026,my 2027,my 2028,my 2029,my 2030,...,my 2041,my 2042,my 2043,my 2044,my 2045,my 2046,my 2047,my 2048,my 2049,my 2050
CCC Subsector,Technology Type,Dispersed or Cluster Site,Process,Selected Option,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Chemicals,Other,Cluster,Methane Leakage - FFP,Strong LDAR,-2850.906567,-2971.894319,-3085.178059,-3080.624823,-3058.42038,-3035.155187,-3014.210011,-3073.525606,-2880.708628,-2797.822908,...,1695.789696,1704.777577,1713.768659,1722.762946,1731.760439,1739.401849,1747.045454,1754.691257,1762.339257,1769.989457


In [None]:
mydf[f'Cost Differential (£m) {y}'].sum() / mydf[f'Total direct emissions abated (MtCO2e) {y}'].sum()

31.387690354623174

In [None]:
for c in df.columns:
    print(c)
    

Site Process ID
Site ID
Site
Process
Sector Process ID
H2 Point
CO2 Point
Injection Site
Rank of Selected Option
Selected Option
Technology ID
Abatement Rate
Year of Implementation
Technology Type
Electricity Connection Cost? (in NPV)
Blank Column
NPV
Number of Sites
Easting
Northing
UK Government Office Region
Country
Element_sector
Sector ID
Site Closure
Distance to Defined Point (km)
Dispersed or Cluster Site
Hydrogen Pipeline/Grid/Trucking?
CO2 Pipeline/Trucking?
Final CO2 Terminal
Latitude
Longitude
Direct Abatement Cost (£/tCO2e)
Indirect Abatement Cost (£/tCO2e)
Baseline emissions (MtCO2e) 2016
Baseline emissions (MtCO2e) 2017
Baseline emissions (MtCO2e) 2018
Baseline emissions (MtCO2e) 2019
Baseline emissions (MtCO2e) 2020
Baseline emissions (MtCO2e) 2021
Baseline emissions (MtCO2e) 2022
Baseline emissions (MtCO2e) 2023
Baseline emissions (MtCO2e) 2024
Baseline emissions (MtCO2e) 2025
Baseline emissions (MtCO2e) 2026
Baseline emissions (MtCO2e) 2027
Baseline emissions (MtCO2e) 