In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import pandas as pd

In [3]:
import json
import itertools

from pathlib import Path

In [4]:
from mppsteel.config.model_scenarios import DEFAULT_SCENARIO
from mppsteel.config.model_config import USD_TO_EUR_CONVERSION_DEFAULT

from mppsteel.config.reference_lists import RESOURCE_CATEGORY_MAPPER

from mppsteel.data_preprocessing.variable_plant_cost_archetypes import (
    plant_variable_costs,
    generate_feedstock_dict,
    generate_variable_costs,
    vc_mapper,
)

from mppsteel.utility.file_handling_utility import (
    read_pickle_folder,
    get_scenario_pkl_path,
)

from mppsteel.config.model_config import (
    PKL_DATA_FORMATTED,
    PKL_DATA_IMPORTS,
    MODEL_YEAR_RANGE,
)

In [5]:
scenario_dict = DEFAULT_SCENARIO.copy()
scenario_dict["usd_to_eur"] = USD_TO_EUR_CONVERSION_DEFAULT
scenario_dict["eur_to_usd"] = 1.0 / scenario_dict["usd_to_eur"]

In [6]:
intermediate_path = get_scenario_pkl_path(
    scenario_dict["scenario_name"], "intermediate"
)
eur_to_usd_rate = scenario_dict["eur_to_usd"]

steel_plants = read_pickle_folder(PKL_DATA_FORMATTED, "steel_plants_processed", "df")
steel_plant_region_ng_dict = (
    steel_plants[["country_code", "cheap_natural_gas"]]
    .set_index("country_code")
    .to_dict()["cheap_natural_gas"]
)
power_grid_prices_ref = read_pickle_folder(
    intermediate_path, "power_grid_prices_ref", "df"
)
h2_prices_ref = read_pickle_folder(intermediate_path, "h2_prices_ref", "df")
bio_model_prices_ref = read_pickle_folder(
    intermediate_path, "bio_model_prices_ref", "df"
)
ccs_model_transport_ref = read_pickle_folder(
    intermediate_path, "ccs_model_transport_ref", "df"
)
ccs_model_storage_ref = read_pickle_folder(
    intermediate_path, "ccs_model_storage_ref", "df"
)
business_cases = read_pickle_folder(
    PKL_DATA_FORMATTED, "standardised_business_cases", "df"
).reset_index()
static_energy_prices = read_pickle_folder(
    PKL_DATA_IMPORTS, "static_energy_prices", "df"
)[["Metric", "Year", "Value"]]
static_energy_prices.set_index(["Metric", "Year"], inplace=True)
feedstock_dict = generate_feedstock_dict(eur_to_usd_rate)
steel_plant_country_codes = list(steel_plants["country_code"].unique())
product_range_year_country = list(
    itertools.product(MODEL_YEAR_RANGE, steel_plant_country_codes)
)

In [7]:
dyc = pd.DataFrame(product_range_year_country, columns=("year", "country_code"))

In [211]:
%%time
df = business_cases.merge(dyc, how="cross")
not_categorical = set(["year", "value"])
categorical_columns = [col for col in df.columns if col not in not_categorical]
for col in categorical_columns:
    df[col] = df[col].astype("category")
df["cost"] = 0.0
df = df.reset_index().set_index(["material_category", "index"])

CPU times: user 744 ms, sys: 111 ms, total: 855 ms
Wall time: 859 ms


In [212]:
df.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1192136 entries, ('Iron ore', 0) to ('Emissivity', 1192135)
Data columns (total 7 columns):
 #   Column        Non-Null Count    Dtype   
---  ------        --------------    -----   
 0   technology    1192136 non-null  category
 1   metric_type   1192136 non-null  category
 2   unit          1192136 non-null  category
 3   value         1192136 non-null  float64 
 4   year          1192136 non-null  int64   
 5   country_code  1192136 non-null  category
 6   cost          1192136 non-null  float64 
dtypes: category(4), float64(2), int64(1)
memory usage: 78.9 MB


In [90]:
df.loc["Electricity"].shape

(51832, 7)

In [91]:
%%time
pgp_ref_list = [
    (year, cc, price) for (year, cc), price in power_grid_prices_ref.items()
]
df_pgp = pd.DataFrame(pgp_ref_list, columns=("year", "country_code", "price"))
df_pgp["country_code"] = df_pgp["country_code"].astype("category")

CPU times: user 6.29 ms, sys: 1.55 ms, total: 7.84 ms
Wall time: 7.29 ms


In [24]:
df_pgp.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7719 entries, 0 to 7718
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   year          7719 non-null   int64   
 1   country_code  7719 non-null   category
 2   price         7719 non-null   float64 
dtypes: category(1), float64(1), int64(1)
memory usage: 158.5 KB


In [65]:
def calculate_price_for_electricity(df, df_pgp):
    de = df[df.material_category == "Electricity"]
    dm = de.reset_index().merge(df_pgp, on=("year", "country_code")).set_index("index")
    dm["cost"] = dm.value * dm.price
    df["cost"] = dm.cost
    return df

In [213]:
def calculate_price_for_electricity_new(df, df_pgp):
    de = df.loc["Electricity"]
    dm = de.reset_index().merge(df_pgp, on=("year", "country_code"))
    dm["material_category"] = "Electricity"
    dm["material_category"] = dm["material_category"].astype("category")
    dm = dm.set_index(["material_category", "index"])
    dm["cost"] = dm.value * dm.price
    df.loc[["Electricity"], ["cost"]] = dm.cost
    return df

In [210]:
%%time
df = calculate_price_for_electricity(df, df_pgp)

AttributeError: 'DataFrame' object has no attribute 'material_category'

In [214]:
%%time
df = calculate_price_for_electricity_new(df, df_pgp)

CPU times: user 123 ms, sys: 16.2 ms, total: 140 ms
Wall time: 139 ms


In [215]:
query_str = "material_category == 'Electricity' and technology == 'DRI-Melt-BOF+CCUS' and year == 2050 and country_code == 'USA'"
df.query(query_str)

Unnamed: 0_level_0,Unnamed: 1_level_0,technology,metric_type,unit,value,year,country_code,cost
material_category,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Electricity,1010723,DRI-Melt-BOF+CCUS,Purchased energy,GJ/t steel,3.50132,2050,USA,59.091243


In [208]:
query_str = "material_category == 'Electricity' and technology == 'DRI-Melt-BOF+CCUS' and year == 2050 and country_code == 'USA'"
df.query(query_str)

Unnamed: 0_level_0,Unnamed: 1_level_0,technology,metric_type,unit,value,year,country_code,cost
material_category,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Electricity,1010723,DRI-Melt-BOF+CCUS,Purchased energy,GJ/t steel,3.50132,2050,USA,59.091243


In [72]:
dg = df.set_index("material_category")

In [75]:
dg.loc["Electricity"].shape

(51832, 7)

In [78]:
df.reset_index().set_index(("index", "material_category"))

KeyError: "None of [('index', 'material_category')] are in the columns"

In [85]:
dg = df.reset_index().set_index(["material_category", "index"])

In [86]:
dg

Unnamed: 0_level_0,Unnamed: 1_level_0,technology,metric_type,unit,value,year,country_code,cost
material_category,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Iron ore,0,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,DZA,
Iron ore,1,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,AGO,
Iron ore,2,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,EGY,
Iron ore,3,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,LBY,
Iron ore,4,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,MAR,
...,...,...,...,...,...,...,...,...
Emissivity,1192131,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,SAU,
Emissivity,1192132,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,SYR,
Emissivity,1192133,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,CAN,
Emissivity,1192134,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,MEX,


In [87]:
dg.loc["Electricity"]

Unnamed: 0_level_0,technology,metric_type,unit,value,year,country_code,cost
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
32984,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,DZA,4.498158
32985,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,AGO,4.498158
32986,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,EGY,4.498158
32987,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,LBY,4.498158
32988,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,MAR,4.498158
...,...,...,...,...,...,...,...
1173283,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,SAU,173.616410
1173284,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,SYR,173.616410
1173285,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,CAN,121.742207
1173286,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,MEX,121.742207


In [98]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,technology,metric_type,unit,value,year,country_code,cost
material_category,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Iron ore,0,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,DZA,0.0
Iron ore,1,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,AGO,0.0
Iron ore,2,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,EGY,0.0
Iron ore,3,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,LBY,0.0
Iron ore,4,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,MAR,0.0
...,...,...,...,...,...,...,...,...
Emissivity,1192131,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,SAU,0.0
Emissivity,1192132,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,SYR,0.0
Emissivity,1192133,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,CAN,0.0
Emissivity,1192134,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,MEX,0.0


In [205]:
%%time
de = df.loc["Electricity" ]
dm = de.reset_index().merge(df_pgp, on=("year", "country_code"))
dm["material_category"] = "Electricity"
dm["material_category"] = dm["material_category"].astype("category")
dm = dm.set_index(["material_category", "index"])
dm["cost"] = dm.value * dm.price

CPU times: user 33 ms, sys: 5.23 ms, total: 38.2 ms
Wall time: 36.8 ms


In [206]:
query_str = (
    "technology == 'DRI-Melt-BOF+CCUS' and year == 2050 and country_code == 'USA'"
)
dm.query(query_str)

Unnamed: 0_level_0,Unnamed: 1_level_0,technology,metric_type,unit,value,year,country_code,cost,price
material_category,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Electricity,1010723,DRI-Melt-BOF+CCUS,Purchased energy,GJ/t steel,3.50132,2050,USA,59.091243,16.87685


In [207]:
df.loc[["Electricity"], ["cost"]] = dm.cost

In [200]:
dg = pd.DataFrame({"material_category": "Electricity", "index": [32984], "cost": 1.0})
dg["material_category"] = dg["material_category"].astype("category")
dg = dg.set_index(["material_category", "index"])

In [201]:
dg

Unnamed: 0_level_0,Unnamed: 1_level_0,cost
material_category,index,Unnamed: 2_level_1
Electricity,32984,1.0


In [187]:
dg.cost

index
32984    123.0
Name: cost, dtype: float64

In [202]:
%%time
df["cost"] = dg.cost

CPU times: user 1.31 s, sys: 70.4 ms, total: 1.38 s
Wall time: 1.38 s


In [203]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,technology,metric_type,unit,value,year,country_code,cost
material_category,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Iron ore,0,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,DZA,
Iron ore,1,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,AGO,
Iron ore,2,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,EGY,
Iron ore,3,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,LBY,
Iron ore,4,Avg BF-BOF,Feedstock,t/t steel,1.367188,2020,MAR,
...,...,...,...,...,...,...,...,...
Emissivity,1192131,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,SAU,
Emissivity,1192132,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,SYR,
Emissivity,1192133,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,CAN,
Emissivity,1192134,BAT BF-BOF+BECCUS,Other,tCO2/t steel,-0.404088,2050,MEX,


In [191]:
df.loc["Electricity", ["cost"]] = dg.cost

In [196]:
df.loc["Electricity", ["cost"]]

Unnamed: 0_level_0,cost
index,Unnamed: 1_level_1
32984,1.0
32985,
32986,
32987,
32988,
...,...
1173283,
1173284,
1173285,
1173286,


In [168]:
df.loc["Electricity"].cost

index
32984     NaN
32985     NaN
32986     NaN
32987     NaN
32988     NaN
           ..
1173283   NaN
1173284   NaN
1173285   NaN
1173286   NaN
1173287   NaN
Name: cost, Length: 51832, dtype: float64

In [153]:
df.loc["Electricity"] = dm.cost

ValueError: Must have equal len keys and value when setting with an iterable

In [151]:
df.loc["index"]

KeyError: 'index'

In [149]:
df.loc["Electricity"]

Unnamed: 0_level_0,technology,metric_type,unit,value,year,country_code,cost,index
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
32984,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,DZA,,
32985,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,AGO,,
32986,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,EGY,,
32987,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,LBY,,
32988,Avg BF-BOF,Purchased energy,GJ/t steel,0.197855,2020,MAR,,
...,...,...,...,...,...,...,...,...
1173283,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,SAU,,
1173284,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,SYR,,
1173285,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,CAN,,
1173286,BAT BF-BOF+BECCUS,Purchased energy,GJ/t steel,7.213562,2050,MEX,,


In [154]:
df = pd.DataFrame(
    [[1, 2], [4, 5], [7, 8]],
    index=["cobra", "viper", "sidewinder"],
    columns=["max_speed", "shield"],
)

In [155]:
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [156]:
df.loc["viper"]

max_speed    4
shield       5
Name: viper, dtype: int64

In [157]:
df.loc[["viper", "sidewinder"]]

Unnamed: 0,max_speed,shield
viper,4,5
sidewinder,7,8


In [158]:
df.loc["cobra", "shield"]

2

In [159]:
df.loc[["viper", "sidewinder"], ["shield"]] = 50

In [160]:
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,50
sidewinder,7,50
