# Running CEDS Scenarios

In this notebook we document how to process and run data from the CEDS database.

In [1]:
%matplotlib inline

from os import listdir
from os.path import join, dirname
from pprint import pprint

import pandas as pd
import pyam
from pyam.utils import LONG_IDX
import pint
from pint.pandas_interface import PintArray
from pint.errors import DimensionalityError

import pymagicc
from pymagicc.io import MAGICCData

import matplotlib.pyplot as plt
plt.style.use('bmh') 

import expectexception

<IPython.core.display.Javascript object>

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
TEST_DATA_PATH = join("..", "tests", "test_data")

## Reading in a CEDS csv

To read in CEDS csv's, we make use of the `pyam` library which is specifically designed for this purpose.

In [4]:
def read_ceds_csv(file_to_read):
    return pyam.IamDataFrame(
        data=file_to_read,
        encoding="utf-8"
    )

ceds_pyam_df = read_ceds_csv(join(TEST_DATA_PATH, "ceds-format-example.csv"))
ceds_pyam_df  # this just shows the type of ceds_pyam_df
ceds_pyam_df.data  # this returns the underlying DataFrame

INFO:root:Reading `../tests/test_data/ceds-format-example.csv`


<pyam.core.IamDataFrame at 0x1050f8b38>

Unnamed: 0,model,scenario,region,variable,unit,year,value
0,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5ASIA,Emissions|BC,Mt BC/yr,2015,36.398915
91,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5LAM,Emissions|BC,Mt BC/yr,2015,26.494276
174,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5MAF,Emissions|BC,Mt BC/yr,2015,30.456913
257,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5OECD,Emissions|BC,Mt BC/yr,2015,32.513038
348,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5REF,Emissions|BC,Mt BC/yr,2015,18.561100
439,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,World,Emissions|BC,Mt BC/yr,2015,152.232300
2276,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5ASIA,Emissions|BC,Mt BC/yr,2020,26.645995
2367,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5LAM,Emissions|BC,Mt BC/yr,2020,4.195137
2450,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5MAF,Emissions|BC,Mt BC/yr,2020,24.990606
2533,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,R5OECD,Emissions|BC,Mt BC/yr,2020,20.422119


## Checking an `IamDataFrame`

It is very easy to check that the sum of a given variable's sub-categories is equal to its declared total and that the sum of regions gives the world total.

We show how in the next cell.

In [5]:
# show check_internal_consistency method here

## Reshaping an `IamDataFrame`

Here we show how to reshape an `IamDataFrame` to get it into the format expected by `openscm` so we can then write files with the data in it.

Note: we normally want to take this step last, after we have done all our aggregation etc., as it means that we no longer have an `IamDataFrame` and can't use all the helpful tools it provides any more.

In [6]:
def reshape_pyam_df_to_openscm_df(pyam_df):
    raw_df = pyam_df.data
    
    reindexed_df = raw_df.set_index(
        ["model", "scenario", "region", "variable", "unit", "year"]
    ).unstack().T
    
    years = reindexed_df.index.get_level_values("year")
    if (years % 1 == 0).all() :
        reindexed_df.index = years.astype(int)
    reindexed_df.index.name = "YEAR"
    
    models = reindexed_df.columns.get_level_values("model")
    scenarios = reindexed_df.columns.get_level_values("scenario")
    regions = reindexed_df.columns.get_level_values("region")
    variables = reindexed_df.columns.get_level_values("variable")
    units = reindexed_df.columns.get_level_values("unit")
    todos = ["SET"] * len(units)
    
    reindexed_df.columns = pd.MultiIndex.from_arrays(
        [models, scenarios, variables, todos, units, regions],
        names=("MODEL", "SCENARIO", "VARIABLE", "TODO", "UNITS", "REGION"),
    )
    
    return reindexed_df

In [7]:
reshape_pyam_df_to_openscm_df(ceds_pyam_df)

MODEL,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,...,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2
SCENARIO,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,...,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS
VARIABLE,Emissions|BC,Emissions|BC|Agricultural Waste Burning,Emissions|BC|Energy Sector,Emissions|BC|Forest Burning,Emissions|BC|Grassland Burning,Emissions|BC|Industrial Sector,Emissions|BC|Peat Burning,Emissions|BC|Residential Commercial Other,Emissions|BC|Transportation Sector,Emissions|BC|Waste,...,Emissions|VOC|Energy Sector,Emissions|VOC|Forest Burning,Emissions|VOC|Grassland Burning,Emissions|VOC|Industrial Sector,Emissions|VOC|International Shipping,Emissions|VOC|Peat Burning,Emissions|VOC|Residential Commercial Other,Emissions|VOC|Solvents Production and Application,Emissions|VOC|Transportation Sector,Emissions|VOC|Waste
TODO,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,...,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET
UNITS,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt BC/yr,...,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr
REGION,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,...,World,World,World,World,World,World,World,World,World,World
YEAR,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6
2015,36.398915,3.97409,1.310619,2.652261,6.632639,6.185669,6.638564,4.419253,0.03132,4.554501,...,22.435404,16.880072,22.60382,16.315349,3.667445,13.411263,5.034825,17.445969,26.225336,14.437468
2020,26.645995,2.737676,8.896164,3.540554,-1.279472,-1.20352,4.261367,2.938159,8.220322,-1.465254,...,24.250725,22.231554,17.233288,28.035002,1.35484,6.119121,36.869889,25.563118,7.713587,19.336159
2030,36.275098,9.717128,3.933159,-1.479242,5.246591,1.119544,9.210203,3.632588,-0.132603,5.02773,...,27.913358,31.092027,20.665074,28.665685,6.227858,1.114096,20.36238,29.293317,15.953845,19.979822
2040,32.112875,8.026567,6.339154,1.199015,4.793242,-0.491045,-0.586533,6.729051,2.319591,3.783833,...,27.293127,11.304503,24.332199,28.192201,1.665738,12.144726,9.950273,23.291705,28.238521,34.11867
2050,32.827797,1.968237,8.799231,0.975675,4.584317,7.386991,5.537183,5.498379,-1.744733,-0.177482,...,3.585028,13.929736,15.990923,25.657469,5.665411,19.182947,11.887134,25.978864,23.315974,17.515016
2060,47.318307,3.889015,1.607069,4.169055,7.077807,6.433131,8.549187,2.862984,3.995876,8.734183,...,22.974871,14.683648,23.173426,7.474016,9.607214,10.064491,15.923026,16.306707,25.180956,20.820999
2070,30.100152,2.213773,4.971571,4.518712,7.762036,1.085432,-0.97887,6.789561,-1.689148,5.427085,...,27.851217,21.792182,17.925362,20.895192,7.15011,10.857792,5.868277,24.623041,7.85957,30.71067
2080,27.344638,7.07489,-1.760939,8.024338,0.722233,4.163659,0.498857,7.492641,-1.224361,2.353321,...,4.588107,32.430956,7.012352,17.250064,9.149079,14.921297,34.437474,20.116122,21.698729,23.599304
2090,36.352506,7.606149,-1.632714,2.7408,1.58108,5.67577,7.586958,3.997953,4.610668,4.185841,...,10.812095,19.494913,12.77369,12.267719,-0.111299,17.83985,18.633501,19.931002,30.717494,33.750923
2100,31.18729,4.797244,2.411164,1.578012,4.84812,1.165334,2.736112,1.181979,5.63282,6.836504,...,18.622264,27.045651,18.760661,14.540381,2.43613,4.356084,28.422837,24.957032,24.37807,22.278002


## Super brief intro to pyam

The `pyam` library provides some very natural ways of filtering their DataFrames. These are detailed in [their tutorial](https://github.com/IAMconsortium/pyam/blob/master/tutorial/pyam_first_steps.ipynb). Here we use them to help convert IAM data into the emissions variables, regions and units used by openscm and MAGICC.

In [8]:
tdf = ceds_pyam_df.filter(
    level=1,
    model="MODEL-NAME-HYPHENS",
    scenario="SCENARIO-A-B-CDE-2",
    region="World",
)
tdf.variables()
tdf.regions()
tdf.data.head()

0            Emissions|BC
1          Emissions|C2F6
2          Emissions|CCl4
3           Emissions|CF4
4        Emissions|CFC-11
5       Emissions|CFC-113
6       Emissions|CFC-114
7       Emissions|CFC-115
8        Emissions|CFC-12
9           Emissions|CH3
10      Emissions|CH3CCl3
11        Emissions|CH3Cl
12          Emissions|CH4
13           Emissions|CO
14          Emissions|CO2
15    Emissions|HCFC-141b
16    Emissions|HCFC-142b
17      Emissions|HCFC-22
18          Emissions|HFC
19    Emissions|Halon1202
20    Emissions|Halon1211
21    Emissions|Halon1301
22    Emissions|Halon2402
23          Emissions|N2O
24          Emissions|NH3
25          Emissions|NOx
26           Emissions|OC
27          Emissions|SF6
28       Emissions|Sulfur
29          Emissions|VOC
Name: variable, dtype: object

0    World
Name: region, dtype: object

Unnamed: 0,model,scenario,region,variable,unit,year,value
439,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,World,Emissions|BC,Mt BC/yr,2015,152.2323
2715,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,World,Emissions|BC,Mt BC/yr,2020,127.122154
4991,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,World,Emissions|BC,Mt BC/yr,2030,177.813834
7267,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,World,Emissions|BC,Mt BC/yr,2040,218.627907
9543,MODEL-NAME-HYPHENS,SCENARIO-A-B-CDE-2,World,Emissions|BC,Mt BC/yr,2050,219.680082


## Converting CEDS data to openscm data

In [9]:
# ceds openscm mapping
ceds_openscm_var_mapping = {
    "Aircraft": ["Aircraft"],
    "International Shipping": ["International Shipping"],
    "AFOLULUC": ["Agricultural Waste Burning", "Agriculture", 
                 "Forest Burning", "Grassland Burning", 
                 "Peat Burning", "Aggregate - Agriculture and LUC"],
    "Fossil": ["Energy Sector", "Industrial Sector", 
               "Residential Commercial Other", 
               "Solvents Production and Application", 
               "Transportation Sector", "Waste"]
}
# need better name for this
TMP_INDEX = ['model', 'scenario', 'region', 'year', 'unit']

In [10]:
def convert_ceds_to_openscm_variable(ceds_var):
    raw_ceds_var = ceds_var.replace("Emissions|", "")
    
    special_cases = {
        "Sulfur": "SOX",
    }
    
    if raw_ceds_var in special_cases:
        raw_var = special_cases[raw_ceds_var]
    else:
        raw_var = raw_ceds_var.replace("-", "").upper()
        
    return "Emissions|" + raw_var

def convert_ceds_df_to_openscm_df(pyam_df):
    openscm_df = pyam.IamDataFrame(data=pyam_df.data.copy())
    openscm_df.data.region = openscm_df.data.region.str.upper()
    
    output_df = openscm_df.filter(level=1,).data
    output_df.variable = output_df.variable.apply(convert_ceds_to_openscm_variable)
    
    openscm_df = openscm_df.filter(
        level='1-', 
        keep=False
    )
    
    handled_vars = []
    metadata = {}
    for variable in openscm_df.variables():
        base_var = "|".join(variable.split("|")[:2])
        
        for category, suffixes in ceds_openscm_var_mapping.items():
            openscm_var = "{}|{}".format(
                convert_ceds_to_openscm_variable(variable.split("|")[1]),
                category,
            )
            if openscm_var in handled_vars:
                continue
            handled_vars.append(openscm_var)
            
            contrib_vars = ["{}|{}".format(base_var, s) for s in suffixes]
            
            var_cat_df = openscm_df.data[openscm_df.data.variable.isin(contrib_vars)]
            var_cat_df = pd.DataFrame(var_cat_df.groupby(TMP_INDEX).sum()['value'])
            var_cat_df = pd.concat([var_cat_df], keys=[openscm_var], names=['variable'])

            output_df = pd.concat([output_df, var_cat_df.reset_index()], sort=False)

            metadata[openscm_var] = "Sum of {}".format(", ".join(contrib_vars))
    
    return pyam.IamDataFrame(data=output_df), metadata

In [11]:
openscm_df, metadata = convert_ceds_df_to_openscm_df(ceds_pyam_df)

pprint([v for v in openscm_df.variables()])
pprint([r for r in openscm_df.regions()])
reshape_pyam_df_to_openscm_df(openscm_df)

['Emissions|BC',
 'Emissions|BC|AFOLULUC',
 'Emissions|BC|Aircraft',
 'Emissions|BC|Fossil',
 'Emissions|BC|International Shipping',
 'Emissions|C2F6',
 'Emissions|CCL4',
 'Emissions|CF4',
 'Emissions|CFC11',
 'Emissions|CFC113',
 'Emissions|CFC114',
 'Emissions|CFC115',
 'Emissions|CFC12',
 'Emissions|CH3',
 'Emissions|CH3CCL3',
 'Emissions|CH3CL',
 'Emissions|CH4',
 'Emissions|CH4|AFOLULUC',
 'Emissions|CH4|Fossil',
 'Emissions|CH4|International Shipping',
 'Emissions|CO',
 'Emissions|CO2',
 'Emissions|CO2|AFOLULUC',
 'Emissions|CO2|Aircraft',
 'Emissions|CO2|Fossil',
 'Emissions|CO2|International Shipping',
 'Emissions|CO|AFOLULUC',
 'Emissions|CO|Aircraft',
 'Emissions|CO|Fossil',
 'Emissions|CO|International Shipping',
 'Emissions|HALON1202',
 'Emissions|HALON1211',
 'Emissions|HALON1301',
 'Emissions|HALON2402',
 'Emissions|HCFC141B',
 'Emissions|HCFC142B',
 'Emissions|HCFC22',
 'Emissions|HFC',
 'Emissions|N2O',
 'Emissions|NH3',
 'Emissions|NH3|AFOLULUC',
 'Emissions|NH3|Aircra

MODEL,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,...,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2
SCENARIO,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,...,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS
VARIABLE,Emissions|BC,Emissions|BC|AFOLULUC,Emissions|BC|Fossil,Emissions|CH4,Emissions|CH4|AFOLULUC,Emissions|CH4|Fossil,Emissions|CO,Emissions|CO2,Emissions|CO2|Fossil,Emissions|CO|AFOLULUC,...,Emissions|SOX,Emissions|SOX|AFOLULUC,Emissions|SOX|Aircraft,Emissions|SOX|Fossil,Emissions|SOX|International Shipping,Emissions|VOC,Emissions|VOC|AFOLULUC,Emissions|VOC|Aircraft,Emissions|VOC|Fossil,Emissions|VOC|International Shipping
TODO,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,...,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET
UNITS,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt CH4/yr,Mt CH4/yr,Mt CH4/yr,Mt CO/yr,Mt CO2/yr,Mt CO2/yr,Mt CO/yr,...,Mt SO2/yr,Mt SO2/yr,Mt SO2/yr,Mt SO2/yr,Mt SO2/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr,Mt VOC/yr
REGION,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,...,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD
YEAR,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6
2015,36.398915,19.897554,16.501361,33.186689,11.676655,21.510034,27.647071,36.300442,36.300442,3.371103,...,171.664815,68.889755,-0.030527,98.684978,4.120609,173.700881,68.916762,-0.777677,101.894351,3.667445
2020,26.645995,9.260125,17.38587,49.676666,34.772684,14.903982,37.980337,18.820329,18.820329,23.556976,...,165.431129,92.593707,6.41343,67.131894,-0.707902,211.11627,58.357976,9.634973,141.768481,1.35484
2030,36.275098,22.694679,13.580419,46.979559,31.844023,15.135537,56.035339,19.323918,19.323918,20.931029,...,190.561959,67.346041,5.226461,109.822993,8.166465,215.210141,65.510687,1.30319,142.168406,6.227858
2040,32.112875,13.432291,18.680585,44.691875,11.953285,32.73859,28.72978,29.048607,29.048607,7.942382,...,211.294588,94.528544,9.506759,106.129906,1.129379,221.624679,70.872486,-1.998041,151.084496,1.665738
2050,32.827797,13.065412,19.762386,30.754564,18.804239,11.950325,27.664517,22.267156,22.267156,14.235618,...,168.798813,66.132288,9.662886,92.322743,0.680895,194.019653,70.58646,9.828296,107.939485,5.665411
2060,47.318307,23.685064,23.633243,45.037278,10.391316,34.645962,29.938876,21.045728,21.045728,15.888153,...,230.182385,100.929007,6.596293,113.638208,9.018877,178.612548,55.844966,4.479794,108.680574,9.607214
2070,30.100152,13.515651,16.584501,40.64809,13.925268,26.722822,26.393159,32.946365,32.946365,6.795837,...,177.302515,63.299536,7.417189,98.471268,8.114521,215.60707,90.416292,0.232701,117.807967,7.15011
2080,27.344638,16.320317,11.02432,47.31943,31.660236,15.659194,44.438492,22.693831,22.693831,21.066322,...,179.045043,68.753088,8.690199,101.162282,0.439474,205.846515,68.895227,6.112409,121.6898,9.149079
2090,36.352506,19.514986,16.837519,35.504855,13.973206,21.531649,55.683231,16.846655,16.846655,24.298416,...,140.829716,65.898728,7.746038,63.361738,3.823212,188.036133,63.649517,-1.614818,126.112733,-0.111299
2100,31.18729,13.959489,17.227801,38.099801,11.695808,26.403994,38.403976,15.680066,15.680066,16.972965,...,188.785666,76.634306,1.999117,108.418239,1.734004,204.910681,70.553602,-1.277638,133.198587,2.43613


## Converting openscm data to MAGICC data

Here we show how to then convert an openscm data table to MAGICC data.

In [12]:
def convert_openscm_to_magicc_variable(ceds_variable):
    category_codes = {
        "Aircraft": "AIR",
        "International Shipping": "SHIP",
        "AFOLULUC": "B",
        "Fossil": "I",
    }
    
    special_cases = {
        "VOC": "NMVOC",
    }
    
    # Improvement: do this with regexp
    species = ceds_variable.split("|")[1]
    if species in special_cases:
        species = special_cases[species]
        
    try:
        category = ceds_variable.split("|")[2]
        category_code = category_codes[category]
    except IndexError:
        category_code = ""
    
    return "{}{}_EMIS".format(species, category_code)
    
def convert_openscm_to_magicc_df(openscm_df):
    magicc_df = pyam.IamDataFrame(data=openscm_df.data.copy())
    magicc_df.data.variable = magicc_df.data.variable.apply(convert_openscm_to_magicc_variable)
    # unit conversions
    return magicc_df

In [13]:
magicc_df = convert_openscm_to_magicc_df(openscm_df)
reshape_pyam_df_to_openscm_df(magicc_df)

MODEL,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,...,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2
SCENARIO,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,...,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS
VARIABLE,BCB_EMIS,BCI_EMIS,BC_EMIS,CH4B_EMIS,CH4I_EMIS,CH4_EMIS,CO2I_EMIS,CO2_EMIS,COB_EMIS,COI_EMIS,...,OCB_EMIS,OCI_EMIS,OCSHIP_EMIS,OC_EMIS,SF6_EMIS,SOXAIR_EMIS,SOXB_EMIS,SOXI_EMIS,SOXSHIP_EMIS,SOX_EMIS
TODO,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,...,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET
UNITS,Mt BC/yr,Mt BC/yr,Mt BC/yr,Mt CH4/yr,Mt CH4/yr,Mt CH4/yr,Mt CO2/yr,Mt CO2/yr,Mt CO/yr,Mt CO/yr,...,Mt OC/yr,Mt OC/yr,Mt OC/yr,Mt OC/yr,kt SF6/yr,Mt SO2/yr,Mt SO2/yr,Mt SO2/yr,Mt SO2/yr,Mt SO2/yr
REGION,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,R5ASIA,...,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD
YEAR,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6
2015,19.897554,16.501361,36.398915,11.676655,21.510034,33.186689,36.300442,36.300442,3.371103,24.275968,...,57.318664,74.312308,3.802944,142.979269,1.800539,-0.030527,68.889755,98.684978,4.120609,171.664815
2020,9.260125,17.38587,26.645995,34.772684,14.903982,49.676666,18.820329,18.820329,23.556976,14.423361,...,73.126747,99.627257,2.74818,180.471025,-1.99745,6.41343,92.593707,67.131894,-0.707902,165.431129
2030,22.694679,13.580419,36.275098,31.844023,15.135537,46.979559,19.323918,19.323918,20.931029,35.104309,...,33.132244,76.885968,5.380791,123.248619,8.141167,5.226461,67.346041,109.822993,8.166465,190.561959
2040,13.432291,18.680585,32.112875,11.953285,32.73859,44.691875,29.048607,29.048607,7.942382,20.787398,...,94.125676,93.139837,0.911188,186.454927,3.737162,9.506759,94.528544,106.129906,1.129379,211.294588
2050,13.065412,19.762386,32.827797,18.804239,11.950325,30.754564,22.267156,22.267156,14.235618,13.4289,...,97.124686,104.350798,2.171461,211.132081,1.850793,9.662886,66.132288,92.322743,0.680895,168.798813
2060,23.685064,23.633243,47.318307,10.391316,34.645962,45.037278,21.045728,21.045728,15.888153,14.050723,...,58.928189,81.952714,-0.394555,145.625701,1.536665,6.596293,100.929007,113.638208,9.018877,230.182385
2070,13.515651,16.584501,30.100152,13.925268,26.722822,40.64809,32.946365,32.946365,6.795837,19.597322,...,60.117122,103.015167,4.660071,174.346,6.502613,7.417189,63.299536,98.471268,8.114521,177.302515
2080,16.320317,11.02432,27.344638,31.660236,15.659194,47.31943,22.693831,22.693831,21.066322,23.372171,...,68.602552,111.52768,7.894283,191.76694,-1.059791,8.690199,68.753088,101.162282,0.439474,179.045043
2090,19.514986,16.837519,36.352506,13.973206,21.531649,35.504855,16.846655,16.846655,24.298416,31.384815,...,64.963427,88.351171,-1.801544,154.05165,0.180659,7.746038,65.898728,63.361738,3.823212,140.829716
2100,13.959489,17.227801,31.18729,11.695808,26.403994,38.099801,15.680066,15.680066,16.972965,21.431011,...,79.258432,127.666884,1.040398,213.115027,2.432472,1.999117,76.634306,108.418239,1.734004,188.785666


### Converting to SCEN7 format

In [14]:
def convert_bunkers_to_magicc6_variable(bunker_variable):
    return bunker_variable.replace("SHIP", "I").replace("AIR", "I")

def get_bunkers_df_from_magicc_df(magicc_df):
    ship_df = magicc_df.filter(variable="*SHIP*").data
    ship_df.variable = ship_df.variable.apply(convert_bunkers_to_magicc6_variable)
    ship_df.region = "BUNKERS"
    ship_df.set_index(LONG_IDX, inplace=True)
    
    air_df = magicc_df.filter(variable="*AIR*").data
    air_df.variable = air_df.variable.apply(convert_bunkers_to_magicc6_variable)
    air_df.region = "BUNKERS"
    air_df.set_index(LONG_IDX, inplace=True)

    bunkers_df = ship_df + air_df
    bunkers_df.reset_index(inplace=True)
    
    return bunkers_df

def magicc_df_to_scen7_df(magicc_df):
    scen7_df = magicc_df.filter(
        variable=["*SHIP*", "*AIR*"], 
        keep=False
    )
    # strip out all the variables with breakdown
    # data available
    for variable in scen7_df.variables():
        if variable.endswith(("I_EMIS", "B_EMIS")):
            continue

        if variable.replace("_EMIS", "I_EMIS") in scen7_df.variables().tolist():
            scen7_df = scen7_df.filter(
                variable=variable, 
                keep=False
            )
    
    # to dicuss with Malte, should we do this 
    # given I don't think it matters:
    # - add in N2O breakdown
    # - add in CO2B breakdown
    
    scen7_df = scen7_df.data
    bunkers_df = get_bunkers_df_from_magicc_df(magicc_df)
    
    scen7_df = pd.concat([scen7_df, bunkers_df])
    
    return pyam.IamDataFrame(data=scen7_df)

In [15]:
scen7_df = reshape_pyam_df_to_openscm_df(
    magicc_df_to_scen7_df(magicc_df)
)

#### Write SCEN7 files

In [16]:
scen7_df

MODEL,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,MODEL-NAME-HYPHENS,...,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2,NAME-MODEL-2
SCENARIO,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,SCENARIO-A-B-CDE-2,...,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS,SCENARIO-NAME-HYPHENS
VARIABLE,BCI_EMIS,CO2I_EMIS,COI_EMIS,NH3I_EMIS,NMVOCI_EMIS,NOXI_EMIS,OCI_EMIS,SOXI_EMIS,BCB_EMIS,BCI_EMIS,...,NH3I_EMIS,NMVOCB_EMIS,NMVOCI_EMIS,NOXB_EMIS,NOXI_EMIS,OCB_EMIS,OCI_EMIS,SF6_EMIS,SOXB_EMIS,SOXI_EMIS
TODO,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,...,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET
UNITS,Mt BC/yr,Mt CO2/yr,Mt CO/yr,Mt NH3/yr,Mt VOC/yr,Mt NOx/yr,Mt OC/yr,Mt SO2/yr,Mt BC/yr,Mt BC/yr,...,Mt NH3/yr,Mt VOC/yr,Mt VOC/yr,Mt NOx/yr,Mt NOx/yr,Mt OC/yr,Mt OC/yr,kt SF6/yr,Mt SO2/yr,Mt SO2/yr
REGION,BUNKERS,BUNKERS,BUNKERS,BUNKERS,BUNKERS,BUNKERS,BUNKERS,BUNKERS,R5ASIA,R5ASIA,...,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD
YEAR,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6,Unnamed: 18_level_6,Unnamed: 19_level_6,Unnamed: 20_level_6,Unnamed: 21_level_6
2015,7.808057,8.855288,7.899835,-0.44989,12.153599,4.195142,6.782917,7.912581,19.897554,16.501361,...,84.934645,68.916762,101.894351,76.69307,84.853501,57.318664,74.312308,1.800539,68.889755,98.684978
2020,15.127783,8.139339,8.753331,11.475173,11.208207,11.464866,9.448994,2.53059,9.260125,17.38587,...,97.813641,58.357976,141.768481,84.262013,105.142337,73.126747,99.627257,-1.99745,92.593707,67.131894
2030,13.879839,8.051441,10.2336,10.392513,10.835994,11.615368,7.95518,9.769511,22.694679,13.580419,...,83.509742,65.510687,142.168406,107.920254,107.833818,33.132244,76.885968,8.141167,67.346041,109.822993
2040,3.586545,12.545369,15.570558,11.223494,4.095413,14.403432,1.174812,14.115141,13.432291,18.680585,...,134.193599,70.872486,151.084496,84.982498,78.789306,94.125676,93.139837,3.737162,94.528544,106.129906
2050,2.919318,10.074329,8.36317,7.984235,8.320671,4.295391,8.593765,5.580531,13.065412,19.762386,...,105.194942,70.58646,107.939485,72.949639,96.502733,97.124686,104.350798,1.850793,66.132288,92.322743
2060,5.06774,2.421339,2.760328,11.695897,10.646372,10.441905,15.279038,17.651168,23.685064,23.633243,...,101.349886,55.844966,108.680574,96.519916,67.661884,58.928189,81.952714,1.536665,100.929007,113.638208
2070,2.749164,17.814434,6.712114,9.780849,4.99737,6.375483,-0.443323,18.2188,13.515651,16.584501,...,51.805945,90.416292,117.807967,105.582604,101.224303,60.117122,103.015167,6.502613,63.299536,98.471268
2080,1.171113,8.984201,6.792964,4.856507,0.024543,7.796152,0.542668,5.841155,16.320317,11.02432,...,110.301355,68.895227,121.6898,122.93499,75.062906,68.602552,111.52768,-1.059791,68.753088,101.162282
2090,7.432437,8.771807,0.790272,11.61219,5.530516,4.741502,4.976972,17.317741,19.514986,16.837519,...,102.735132,63.649517,126.112733,102.768453,122.183012,64.963427,88.351171,0.180659,65.898728,63.361738
2100,0.486817,-0.499016,4.170258,6.492259,11.521443,13.085589,4.71121,6.38736,13.959489,17.227801,...,77.860083,70.553602,133.198587,100.02746,107.915531,79.258432,127.666884,2.432472,76.634306,108.418239


In [17]:
for label, df in scen7_df.groupby(level=["MODEL", "SCENARIO"], axis=1):
    fn = "{}_{}.SCEN7".format(*label)
    df.columns = df.columns.droplevel("MODEL").droplevel("SCENARIO")
    df.index = df.index.astype(int)
    writer = MAGICCData()
    writer.df = df
    writer.metadata = {
        "header": "required for some reason\n\n"
    }
    writer.write(fn)

### Converting to SCEN format

In [18]:
def magicc_df_to_scen_df(magicc_df, world_only=True):
    if not world_only:
        raise NotImplementedError("Neccesary checks not yet included")
        
    scen_emis = [
        v + "_EMIS" 
        for v in pymagicc.definitions.scen_emms_code_1]
    scen_df = magicc_df.filter(
        variable=["*SHIP*", "*AIR*"],
        keep=False
    )
    
    scen_df = scen_df.data
    bunkers_df = get_bunkers_df_from_magicc_df(magicc_df)
    
    scen_df = pd.concat([scen_df, bunkers_df])
    
    scen_df = pyam.IamDataFrame(data=scen_df)
    if world_only:
        scen_df = scen_df.filter(
            region="WORLD"
        )
    
    return scen_df.filter(variable=scen_emis)

In [19]:
scen_iam_df = magicc_df_to_scen_df(magicc_df)
scen_df = reshape_pyam_df_to_openscm_df(scen_iam_df)
scen_iam_df.variables()

0        BC_EMIS
1      C2F6_EMIS
2       CF4_EMIS
3       CH4_EMIS
4      CO2B_EMIS
5      CO2I_EMIS
6        CO_EMIS
7       N2O_EMIS
8       NH3_EMIS
9     NMVOC_EMIS
10      NOX_EMIS
11       OC_EMIS
12      SF6_EMIS
13      SOX_EMIS
Name: variable, dtype: object

In [20]:
for label, df in scen_df.groupby(level=["MODEL", "SCENARIO"], axis=1):
    fn = "{}_{}.SCEN".format(*label)

    df.columns = df.columns.droplevel("MODEL").droplevel("SCENARIO")
    df.index = df.index.astype(int)
    
    fn
    df
    
    writer = MAGICCData()
    writer.df = df
    writer.metadata = {
        "header": "required for some reason\n\n"
    }
    writer.write(fn)

'MODEL-NAME-HYPHENS_SCENARIO-A-B-CDE-2.SCEN'

VARIABLE,BC_EMIS,C2F6_EMIS,CF4_EMIS,CH4_EMIS,CO2B_EMIS,CO2I_EMIS,CO_EMIS,N2O_EMIS,NH3_EMIS,NMVOC_EMIS,NOX_EMIS,OC_EMIS,SF6_EMIS,SOX_EMIS
TODO,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET,SET
UNITS,Mt BC/yr,kt C2F6/yr,kt CF4/yr,Mt CH4/yr,Mt CO2/yr,Mt CO2/yr,Mt CO/yr,kt N2O/yr,Mt NH3/yr,Mt VOC/yr,Mt NOx/yr,Mt OC/yr,kt SF6/yr,Mt SO2/yr
REGION,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD,WORLD
YEAR,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4
2015,152.2323,-0.041672,9.171883,174.086943,5.784132,98.881333,164.542176,8.227947,201.585237,223.585251,188.452365,168.480964,4.430015,166.242029
2020,127.122154,5.356546,3.920455,190.914613,-1.819205,116.665714,207.880179,5.923396,222.346973,185.123285,207.703245,175.882816,3.886447,161.796137
2030,177.813834,1.744312,8.971443,173.510923,9.340767,94.997135,210.127141,-1.075648,189.346711,171.168715,206.074835,216.652539,5.770118,179.307805
2040,218.627907,4.660074,8.913939,183.605843,8.08574,157.824079,207.023793,-1.46465,229.721234,176.405342,242.040656,195.657868,2.465104,161.608984
2050,219.680082,4.051723,8.804623,172.782951,9.358825,130.577524,198.292512,9.521761,228.048844,195.762669,206.381987,158.507932,6.301458,180.567689
2060,190.530529,3.082902,7.59332,191.102728,6.95468,113.307226,179.222284,7.530809,184.317859,216.128281,187.982573,156.424602,0.385465,194.958677
2070,176.280741,8.485087,6.035585,198.140247,4.902896,118.794185,162.05994,4.113349,167.73382,223.791591,166.457481,151.673665,4.40249,147.160252
2080,181.90239,0.682135,5.629336,173.182064,4.379161,104.99345,216.447762,8.149596,224.395672,183.695327,196.699307,182.379668,4.104587,207.151965
2090,176.245625,4.861361,7.653445,193.150706,2.998139,142.538934,187.808881,4.406094,164.047091,199.11038,235.379625,194.324809,3.898192,175.915291
2100,188.251512,5.791907,7.094923,177.012208,6.009816,108.167256,199.006218,-0.658941,177.286369,224.269233,177.675473,204.954645,1.848799,206.289278


ValueError: Could not determine scen special code for emissions ['BC', 'C2F6', 'CF4', 'CH4', 'CO2B', 'CO2I', 'CO', 'N2O', 'NH3', 'NMVOC', 'NOX', 'OC', 'SF6', 'SOX']

## Converting units

### An aside on how units work with Pint

We load units with Pint like so.

In [None]:
ureg = pint.UnitRegistry()  # start a unit repository using the default variables
ureg.load_definitions('emissions_units.txt')  # load emissions units too
ureg._contexts  # show us which contexts we have available

In [None]:
# define some variables
a = 1*ureg.C
b = 1*ureg.CO2
c = 3*ureg.N2O

In [None]:
# they carry units with them
a
b
c

In [None]:
# we can convert them to base units or to each other
b.to_base_units()
b.to('C')
c.to('N')

In [None]:
%%expect_exception DimensionalityError
# if we try to do an invalid conversion, an error will be thrown
b.to('N2O')

In [None]:
# however with a context, we can use metric conversions to 
# do our conversions
# AR4GWP12 is a made up metric where 1C = 20N
# hence 1 CO2 = 12/44 C = 12/44*20 N = 12/44*20*14/44 N2O

with ureg.context('AR4GWP12'):
    b
    b.to('N2O')
    12/44*20*14/44

### Converting pyam/openscm dataframes

This is a bit more fiddly so we wrap it in a function.

In [None]:
ceds_pyam_df.head()

In [None]:
def convert_variable_units(pyam_df, variable, target_units):
    output_df = pyam.IamDataFrame(pyam_df.data.copy())
    output_df.data.unit = output_df.data.unit.str.replace("-", "").replace("Mt CO2equiv/yr", "Mt CO2/yr")
    
    var_df = output_df.filter(variable=variable).data.copy()
    rest_df = output_df.filter(variable=variable, keep=False).data.copy()
    
    var_df = var_df.set_index(LONG_IDX).unstack(["variable", "unit"])
    var_df = var_df.pint.quantify(ureg, level=-1)
    
    for col in var_df:
        var_df[col] = var_df[col].pint.to("Gt C/yr")
    # annoying that pint dequantify destroys index
    old_index = var_df.index
    old_columns = var_df.columns
    var_df = var_df.pint.dequantify()
    var_df.index = old_index
    var_df.columns.names = old_columns.names + ['unit']

    var_df = var_df.stack().stack().reset_index()

    return pyam.IamDataFrame(pd.concat([var_df, rest_df]))

In [None]:
ceds_pyam_df.filter(variable="Emissions*").head()

In [None]:
ceds_pyam_df.filter(
    variable="Emissions|CO2*"
).head()
convert_variable_units(ceds_pyam_df, "Emissions|CO2","GtC/yr").filter(
    variable="Emissions|CO2*"
).head()