In [1]:
%matplotlib widget
import matplotlib
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import tqdm
import pickle
import scmdata
import xarray as xr
import pint_xarray
from itertools import product
from cftime import DatetimeGregorian

<IPython.core.display.Javascript object>

pyam - INFO: Running in a notebook, setting `pyam` logging level to `logging.INFO` and adding stderr handler


In [2]:
def scmrun_from_pandas(df, region, variable, scenario, model):
    columns = list(df.index.names)
    columns.remove('Date')
    columns.append('Date')
    dfs = df.pint.magnitude.reorder_levels(columns).sort_index().unstack()
    
    def get_level_values_from_spec(spec):
        if isinstance(spec, str):
            return dfs.index.get_level_values(spec)
        elif callable(spec):
            return spec(dfs)
        else:
            return spec
    
    return scmdata.ScmRun(dfs.values.T,
                          columns={'region': get_level_values_from_spec(region),
                                   'variable': get_level_values_from_spec(variable),
                                   'unit': [str(df.pint.units)],
                                   'scenario': get_level_values_from_spec(scenario),
                                   'model': get_level_values_from_spec(model)},
                          index=dfs.columns)

In [3]:
with open(f'mem/fao_emi_total.pck', 'rb') as fd:
    fao_emi_total_pd = pickle.load(fd)

def variable(dfs):
    elements = pd.Series(dfs.index.get_level_values('Element')).astype(pd.StringDtype())
    items = pd.Series(dfs.index.get_level_values('Item')).astype(pd.StringDtype())
    return elements + ' - ' + items

fao_emi_total = scmrun_from_pandas(fao_emi_total_pd,
                                   region='Area',
                                   variable=variable,
                                   scenario=['history'],
                                   model=['FAO'])

del fao_emi_total_pd

In [4]:
with open(f'mem/lak_emi_energy.pck', 'rb') as fd:
    lak_emi_energy_pd = pickle.load(fd)

lak_emi_energy = scmrun_from_pandas(lak_emi_energy_pd,
                                    region='Area',
                                    variable='Category',
                                    scenario=['history'],
                                    model=['LAK'])

del lak_emi_energy_pd

In [5]:
with open(f'mem/cmip_conc_co2.pck', 'rb') as fd:
    cmip_conc_co2_pd = pickle.load(fd)

cmip_conc_co2_pd.index.set_levels(
    [f'{year:04}-{month:02}-01' for year, month in product([-1] + list(range(1, 2015)), range(1, 13))],
    level='Date',
    inplace=True)    

cmip_conc_co2 = scmrun_from_pandas(cmip_conc_co2_pd,
                                   region='Area',
                                   variable=['CO2 concentration'],
                                   scenario=['history'],
                                   model=['CMIP'])

del cmip_conc_co2_pd

In [6]:
with open(f'mem/prm_emi.pck', 'rb') as fd:
    prm_emi_pd = pickle.load(fd)   

def variable(dfs):
    elements = pd.Series(dfs.index.get_level_values('Entity')).astype(pd.StringDtype())
    items = pd.Series(dfs.index.get_level_values('Category')).astype(pd.StringDtype())
    return elements + ' - ' + items
    
prm_emi = scmrun_from_pandas(prm_emi_pd,
                             region='Area',
                             variable=variable,
                             scenario='Scenario',
                             model=['PRIMAP-hist']
                             )

del prm_emi_pd

In [7]:
with open(f'xr/mem/primap_sources.pck', 'rb') as fd:
    primap_xr = pickle.load(fd)

In [8]:
def scmrun_from_xarray(da, model):
    das = da.pint.quantify().to_array('Element').stack({'index': ['Area', 'Category', 'Class', 'Scenario', 'Type', 'Element']})
    variable = das['Type'] + ' - ' + das['Class'] + ' - ' + das['Element'] + ' - ' + das['Category']
    return scmdata.ScmRun(das.pint.magnitude,
                          columns={'region': das['Area'],
                                   'variable': variable,
                                   'unit': [str(das.pint.units)],
                                   'scenario': das['Scenario'],
                                   'model': [model]},
                          index=das['Date'].values)

In [9]:
primap = {}
for key in tqdm.tqdm_notebook(primap_xr):
    print(key)
    da = primap_xr[key]
    primap[key] = scmrun_from_xarray(da, key)

del primap_xr

HBox(children=(IntProgress(value=0, max=17), HTML(value='')))

CRF2020
BUR2IPCC2006I
BUR3IPCC2006I
ANDREW2018V4I
UNFCCC2019BI
CDIAC2017I
UN2017P
UN2019P
EDGAR42COMPI
EDGAR50I
CRF2019
MPD2018P
BUR1IPCC2006I
PMHSOCIOECO12
PRIMAPHIST20
EDGAR432I
UNFCCC2020AI



## Measure memory use

In [None]:
!mkdir -p sd/mem/

import pickle

for fpath, obj in (
    ('fao_emi_total', fao_emi_total),
    ('lak_emi_energy', lak_emi_energy),
    ('cmip_conc_co2', cmip_conc_co2),
    ('prm_emi', prm_emi), 
    ('primap', primap),
):
    with open(f'sd/mem/{fpath}.pck', 'wb') as fd:
        pickle.dump(obj, fd, -1)

!ls -lah sd/mem/
!ls -lah mem/

In [2]:
# oom @ more than 15 GB memory -> can not test whole primap data set