In [20]:
import pandas as pd
import os
import country_converter as coco

## read example cost data

In [4]:
exports = "/home/ivanra/documents/gams_learning/pypsa_export"
files = os.listdir(exports)
cost_files = [x for x in files if x.lower().find("cost")> -1]
cost_files

['p32_capCostwAdjCostScaled.csv', 'p32_capCostwAdjCost.csv']

In [None]:
def read_remind_csv(file_path, names: list = None, skiprows = 1)->pd.DataFrame:
    df = pd.read_csv(file_path, names = names, skiprows=skiprows)
    if "variable" in df.columns:
        df.drop(columns = ["variable"], inplace = True)
    return df

costs_p = os.path.join(exports,cost_files[1])
capex = read_remind_csv(costs_p, names = ["variable", "year", "region", "tech","value"], skiprows=1)

UNIT_CONVERSION = {
    "capex": 1e6, # TUSD/TW(h) to USD/MW(h)
    "VOM": 1e6/8760, # TUSD/TWa to USD/MWh
    "FOM": 100, # p.u to percent
}

def transform_capex(capex: pd.DataFrame) -> pd.DataFrame:
    capex["value"] *= UNIT_CONVERSION["capex"]
    capex["unit"] = "USD/MW"
    store_techs = ["h2stor", "btstor", "phs"]
    for stor in store_techs:
        capex.loc[capex["tech"] == stor, "unit"] = "USD/MWh"
    return capex

def transform_vom(vom: pd.DataFrame) -> pd.DataFrame:
    vom["value"] *= UNIT_CONVERSION["VOM"]
    vom["unit"] = "USD/MWh"
    return vom

capex = transform_capex(capex)
capex[(capex.year==2025)].drop(columns = ["region"])

tech_data_p = os.path.join(exports, "pm_data.csv")
tech_data = read_remind_csv(tech_data_p, names = ["variable", "year", "field", "tech","value"], skiprows=1)

lifetime = tech_data.query("field == 'lifetime'")
lifetime["unit"] = "years"

vom = tech_data.query("field == 'omv'")
vom = transform_vom(vom)

fom = tech_data.query("field == 'omf'")
fom["unit"] = "percent"
fom["value"] *= UNIT_CONVERSION["FOM"]





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lifetime["unit"] = "years"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vom["value"] *= UNIT_CONVERSION["VOM"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vom["unit"] = "USD/MWh"


Unnamed: 0,year,field,tech,value
490,CHA,lifetime,ngcc,35.0
491,CHA,lifetime,ngccc,35.0
492,CHA,lifetime,ngt,30.0
493,CHA,lifetime,gaschp,35.0
494,CHA,lifetime,dot,25.0
...,...,...,...,...
7589,USA,lifetime,idrcc,20.0
7590,USA,lifetime,bof,20.0
7591,USA,lifetime,vess_lng,99.0
7592,USA,lifetime,tdh2i,45.0


In [None]:
# TODO cost units in pypsa china
# TODO PHS brownfield in china
# TODO hydro brownfield in china

## read region mappings

In [6]:
regions = pd.read_csv(os.path.join(exports,"region_mappings.csv"))
region_isos = regions.groupby("all_regi2")["iso"].apply(lambda row: list(row))
regions.drop(columns="element_text", inplace = True)

In [23]:
def read_remind_regions(mapping_path: os.PathLike)->pd.DataFrame:
    """read the export from remind 

    Args:
        mapping_path (os.PathLike): the path to the remind mapping (csv export of regi2iso set via GamsConnect)

    Returns:
        pd.DataFrame: the region mapping
    """    

    regions = pd.read_csv(mapping_path)
    regions.drop(columns="element_text", inplace = True)
    regions["iso2"] = coco.convert(regions["iso"], to="ISO2")
    return regions

regions = read_remind_regions(os.path.join(exports,"region_mappings.csv"))
regions_isolists = regions.groupby("all_regi2")["iso"].apply(lambda row: list(row))
regions, regions_isolists



(    all_regi2  iso iso2
 0         CHA  CHN   CN
 1         CHA  HKG   HK
 2         CHA  MAC   MO
 3         CHA  TWN   TW
 4         LAM  ABW   AW
 ..        ...  ...  ...
 244       CAZ  NZL   NZ
 245       CAZ  SPM   PM
 246       IND  IND   IN
 247       JPN  JPN   JP
 248       USA  USA   US
 
 [249 rows x 3 columns],
 all_regi2
 CAZ                            [AUS, CAN, HMD, NZL, SPM]
 CHA                                 [CHN, HKG, MAC, TWN]
 EUR    [ALA, AUT, BEL, BGR, CYP, CZE, DEU, DNK, ESP, ...
 IND                                                [IND]
 JPN                                                [JPN]
 LAM    [ABW, AIA, ARG, ATA, ATG, BES, BHS, BLM, BLZ, ...
 MEA    [ARE, BHR, DZA, EGY, ESH, IRN, IRQ, ISR, JOR, ...
 NEU    [ALB, AND, BIH, CHE, GRL, ISL, LIE, MCO, MKD, ...
 OAS    [AFG, ASM, ATF, BGD, BRN, BTN, CCK, COK, CXR, ...
 REF    [ARM, AZE, BLR, GEO, KAZ, KGZ, MDA, RUS, TJK, ...
 SSA    [AGO, BDI, BEN, BFA, BWA, CAF, CIV, CMR, COD, ...
 USA                    

## convert costs

In [74]:

def read_remind_data(file_path, variable_name, rename_columns={}, error_on_empty=True):
    """
    Auxiliary function for standardised and cached reading of REMIND-EU data
    files to pandas.DataFrame.

    Here all values read are considered variable, i.e. use
    "variable_name" also for what is considered a "parameter" in the GDX
    file.
    """
   

    @functools.lru_cache
    def _read_and_cache_remind_file(fp):
        return Container(load_from=fp)

    data = _read_and_cache_remind_file(file_path)[variable_name]
    df = data.records
    descript = data.description
    

    if error_on_empty and (df is None or df.empty):
        raise ValueError(f"{variable_name} is empty. In: {file_path}")

    df = df.rename(columns=rename_columns, errors="raise")

    return df, descript