In [40]:
# This is the generic code for health impact assessment (HIA) and burden of disease (BoD) calculations
# Forked originally from http://en.opasnet.org/w/HIA
# Original R code described in more detail at https://github.com/jtuomist/ghg-notebooks/wiki/Health-impact-assessment

import pandas as pd
import numpy as np
import json
import urllib.request
import math
import copy #import deepcopy
from typing import Any, Dict, Optional, Union

FORECAST_COLUMN = FORECAST_COLUMN
VALUE_COLUMN = VALUE_COLUMN

FORECAST_x = FORECAST_COLUMN + '_x'
FORECAST_y = FORECAST_COLUMN + '_y'
VALUE_x = VALUE_COLUMN + '_x'
VALUE_y = VALUE_COLUMN + '_y'

In [41]:
class Ovariable:
    # content is the dataframe with the estimates
    content: Optional[pd.DataFrame]
    
    # quantity: what the ovariable measures, e.g. exposure, exposure_response, disease_burden
    quantity: Optional[str]
    
    def __init__(self, quantity: Optional[str] = None, content: Optional[pd.DataFrame] = None,
                 name: Optional[str] = None, input_nodes: Optional[list] = None,
                 meta: Optional[list] = None, unit: Optional[str] = None):
        self.name = name
        if quantity is not None:
            self.quantity = quantity # if quantity is not None else self.quantity
        self.content = content
        self.meta = meta
        self.unit = unit
        self.input_nodes = input_nodes
            
    def merge(self, other):
        
        def add_temporary_index(self):
            tst = self.index.to_frame().assign(temporary=1)
            tst = pd.MultiIndex.from_frame(tst)
            return self.set_index(tst)

        if isinstance(other, Ovariable):
            df2 = other.content
        else:
            df2 = pd.DataFrame([other],columns = [VALUE_COLUMN])
            
        df1 = add_temporary_index(self.content)
        df2 = add_temporary_index(df2)
        
        out = df1.merge(df2, left_index = True, right_index = True)
        out.index = out.index.droplevel(['temporary'])
        
        return Ovariable(quantity = '', content = out)
    
    def clean(self):
        df = self.content.reset_index()
        if FORECAST_x in df.columns:
            df[FORECAST_COLUMN] = df[FORECAST_x]  | df[FORECAST_y]
        keep = set(df.columns)- {0,VALUE_x,VALUE_y,FORECAST_x,FORECAST_y}
        df = df[list(keep)].set_index(list(keep - {VALUE_COLUMN,FORECAST_COLUMN}))
        self.content = df
        return self

    def __add__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] + out.content[VALUE_y]
        return out.clean()
    
    def __sub__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] - out.content[VALUE_y]
        return out.clean()    

    def __mul__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] * out.content[VALUE_y]
        return out.clean()    

    def __truediv__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] / out.content[VALUE_y]
        return out.clean()

    def __mod__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] % out.content[VALUE_y]
        return out.clean()

    def __pow__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] ** out.content[VALUE_y]
        return out.clean()

    def __floordiv__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] // out.content[VALUE_y]
        return out.clean()

    def __lt__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] < out.content[VALUE_y]
        return out.clean()

    def __le__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] <= out.content[VALUE_y]
        return out.clean()

    def __gt__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] > out.content[VALUE_y]
        return out.clean()

    def __ge__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] >= out.content[VALUE_y]
        return out.clean()

    def __eq__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] == out.content[VALUE_y]
        return out.clean()

    def __ne__(self, other):
        out = self.merge(other)
        out.content[VALUE_COLUMN] = out.content[VALUE_x] != out.content[VALUE_y]
        return out.clean()

    def log(self):
        self.content =  np.log(self.content)
        return self
    
    def log10(self):
        self.content =  np.log10(self.content)
        return self
    
    def exp(self):
        self.content =  np.exp(self.content)
        return self
    
#    def loc(self, condition):
#        self.content = self.content.loc[condition]
#        return self

In [42]:
o1 = Ovariable(quantity='exposure', content = pd.DataFrame(
    [[1,2,False],[2,5,False],[3,6,True],[4,3,True]], columns=['Year',VALUE_COLUMN,FORECAST_COLUMN]).set_index(['Year']))
o2 = Ovariable(quantity='exposure', content = pd.DataFrame(
    [[1,2,False],[2,5,True],[3,6,True]], columns=['Year',VALUE_COLUMN,FORECAST_COLUMN]).set_index(['Year']))

o3 = o1.merge(o2)
if FORECAST_x in o3.content.columns:
    o3.content[FORECAST_COLUMN] = o3.content[FORECAST_x]  | o3.content[FORECAST_y]
o3 = o2 +o1
o3.content


Unnamed: 0_level_0,Value,Forecast
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,False
2,10,True
3,12,True


In [43]:
# Exposure is the intensity of contact with the environment by the target population.

class Exposure(Ovariable):
    
    quantity = 'exposure'
    scaled = False

    def compute(self):
        for node in self.input_nodes:
            if node.quantity == 'consumption':
                consumption = node
            if node.quantity == 'concentration':
                concentration = node

        exposure = consumption * concentration

        return Exposure(content = exposure.content, input_nodes = self.input_nodes,
                       name = self.name, meta = self.meta, unit = self.unit)
    
    # scale_exposure() scales the exposure by logarithmic function or body weight.
    # The information about how to scale comes from exposure-response function.
    # Thus, er-function and body weight must be provided.

    def scale_exposure(self, erf, bw):
        if self.scaled == True:
            return self

        exposure = self
        
        out = exposure + erf * 0
        out.content = out.content.copy().query("observation == 'ERF'").droplevel('observation')

        out = out.merge(bw).content.reset_index()
        out[VALUE_COLUMN] = np.where(
            out['scaling'] == 'BW',
            out[VALUE_x] / out[VALUE_y],
            out[VALUE_x])

        out[VALUE_COLUMN] = np.where(
            out['scaling'] == 'Log10',
            np.log10(out[VALUE_COLUMN]),
            out[VALUE_COLUMN])

        keep = set(out.columns)- {0,VALUE_x,VALUE_y}
        out = out[list(keep)].set_index(list(keep - {VALUE_COLUMN}))

        self.content_orig = self.content
        self.content = out
        self.scaled = True

        return self

In [44]:
# Consumption is the amount of medium (food, water, air) consumed per time unit.

consumption = pd.DataFrame([['TEQ','child',20],['Fluoride','adult',4], ['PM2.5','adult',3],
                           ['campylobacter','child',5],['norovirus','adult',6],
                            ['giardia','child',7], ['Omega3','adult',8]],
                           columns=['exposure_agent','Age',VALUE_COLUMN]).set_index(['exposure_agent','Age'])
consumption = Ovariable(content = consumption, quantity='consumption')

# Concentration is the concentration of the exposure agent in the medium.

concentration = Ovariable(content = pd.DataFrame([['adult',2.5],['child',1.5]],columns=['Age',VALUE_COLUMN]).set_index(['Age']),
              quantity='concentration')
expo = Exposure(input_nodes=[consumption,concentration], name='pm_exposure', unit='ug/m3').compute()


In [45]:
# bw is the body weight

bw = Ovariable('body_weight',
    content = pd.DataFrame({
        'Age':['child','adult'],
        VALUE_COLUMN:[15,75]
    }).set_index('Age'))
bw.content

Unnamed: 0_level_0,Value
Age,Unnamed: 1_level_1
child,15
adult,75


In [48]:
# Exposure-response function (ERF) is a variable that typically comes from data.
# Data comes from Opasnet [[ERFs of environmental pollutants]]
# http://en.opasnet.org/w/Special:Opasnet_Base?id=op_en5827
# The ovariables are converted to exposure_response_functions.csv by using code
# https://github.com/jtuomist/watch_network/create_erf_csv.R

df = pd.read_csv('exposure_response_functions.csv').drop(['source','hepatitis','age','exposure'], axis=1)

tmp = ['er_function','observation','scaling','exposure_agent','response']
tmp = tmp + list(set(df.columns) - set(tmp) - {VALUE_COLUMN})
print(tmp)
df = df[tmp + [VALUE_COLUMN]].replace({
    'ERS':'UR', # Just one er_function name per equation
    'CSF':'UR',
    'OR':'RR', # treat odds ratio as risk ratio although it is close only at small risk levels
    'TWI':'Step',
    'TDI':'Step',
    'ADI':'Step',
    'RDI':'Step',
    'NOAEL':'Step'
})
erf = df.sort_values(tmp).set_index(tmp)
erf = Ovariable(quantity = 'ERF', content=erf, name='pm_erf')

erf.content

['er_function', 'observation', 'scaling', 'exposure_agent', 'response', 'exposure_unit']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Value
er_function,observation,scaling,exposure_agent,response,exposure_unit,Unnamed: 6_level_1
RR,ERF,,ALA,CHD2 mortality,mg /day,0.999949
RR,ERF,,Arsenic,Bladder cancer morbidity,µg /l,1.002000
RR,ERF,,Chlorination byproducts,Bladder cancer morbidity,netrev /l,1.000029
RR,ERF,,Chlorination byproducts,Bladder cancer morbidity,µg /l,1.003900
RR,ERF,,Dampness damage,Asthma morbidity,%,1.370000
...,...,...,...,...,...,...
exact beta poisson,Threshold,,norovirus,norovirus infection,?,0.055000
exact beta poisson,Threshold,,rotavirus,rotavirus infection,?,0.191000
exact beta poisson,Threshold,,sapovirus,sapovirus infection,?,0.055000
exponential,ERF,,giardia,giardia infection,?,0.019900


In [49]:
expo.scale_exposure(erf, bw).content

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Value
Age,response,exposure_unit,er_function,scaling,exposure_agent,Unnamed: 6_level_1
adult,Fluorosis,,UR,,Fluoride,10.0
adult,Fluorosis,,UR,,Fluoride,10.0
adult,Breast cancer,mg /d,RR,,Omega3,20.0
adult,CHD2 mortality,mg /day,RR,,Omega3,20.0
adult,CHD3 mortality,mg /day,RR,,Omega3,20.0
adult,Coronary heart disease mortality,mg /day,RR,,Omega3,20.0
adult,Stroke mortality,mg /day,RR,,Omega3,20.0
adult,CHD arrythmia mortality,mg /day,Relative Hill,,Omega3,20.0
adult,CHD2 mortality,mg /day,Relative Hill,,Omega3,20.0
adult,Stroke mortality,mg /day,Relative Hill,,Omega3,20.0


In [50]:
# Frexposed is the fraction of exposed individuals within the target population. Defaults to 1
# but may be indexed by population subgroups.

frexposed = pd.DataFrame({
        'Age':['child','adult'],
        VALUE_COLUMN:[1,1]
    }).set_index('Age')
frexposed = Ovariable(quantity = 'frexposed', content = frexposed, name='pm_frexposed')

frexposed.content

Unnamed: 0_level_0,Value
Age,Unnamed: 1_level_1
child,1
adult,1


In [51]:
# P_illness is the probability of illness. Relevant for microbial infection endpoints.
# Typically a microbe-specific constant.

p_illness = pd.DataFrame({
        'response':['campylobacter infection','giardia infection','norovirus infection'],
        VALUE_COLUMN:[1,1,1]
    }).set_index('response')
p_illness = Ovariable(quantity = 'p_illness', content = p_illness, name = 'p_illness_microbe')
p_illness.content

Unnamed: 0_level_0,Value
response,Unnamed: 1_level_1
campylobacter infection,1
giardia infection,1
norovirus infection,1


In [52]:
# Relative risk (RR) is the risk of an exposed individual compared with a counterfactual
# unexposed individual using the modelled exposures. 

class Rr(Ovariable):
    quantity = 'RR'

    def compute(self):
        for node in self.input_nodes:
            if node.quantity == 'ERF':
                erf = node
            if node.quantity == 'body_weight':
                bw = node
            if node.quantity == 'exposure':
                exposure = node
                
        dose = exposure.scale_exposure(erf, bw)

        out = pd.DataFrame()

        relative_functions = ['RR','Relative Hill']

        for func in relative_functions:
            param1 = copy.deepcopy(erf)
            param1.content = param1.content.loc[(func,'ERF')] # The er_function must be the first and observation the second level
            param2 = copy.deepcopy(erf)
            param2.content = param2.content.loc[(func,'Threshold')]

            if func == 'RR':
                rr = param1
                threshold = param2

                dose2 = (dose - threshold)#.dropna()
                
                dose2.content = np.clip(dose2.content, 0, None) # Smallest allowed value is 0

                out1 = (rr.log() * dose2).exp() #.dropna()
                out = out.append(out1.content.reset_index())

            if func == 'Relative Hill':
                Imax = param1
                ed50 = param2

                out2 = (dose * Imax) / (dose + ed50) + 1

                out = out.append(out2.content.reset_index())

        keep = set(out.columns) - {0}
        out = out[list(keep)].set_index(list(keep - {VALUE_COLUMN}))
        
        self.content = out

        return self
    

In [53]:
rr = Rr(name='rr', input_nodes = [expo, erf, bw]).compute()
rr.content

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Value
Age,response,exposure_unit,er_function,scaling,exposure_agent,Unnamed: 6_level_1
adult,Breast cancer,mg /d,RR,,Omega3,0.989794
adult,CHD2 mortality,mg /day,RR,,Omega3,0.998549
adult,CHD2 mortality,mg /day,Relative Hill,,Omega3,0.998549
adult,CHD3 mortality,mg /day,RR,,Omega3,0.990245
adult,Coronary heart disease mortality,mg /day,RR,,Omega3,0.960751
adult,Stroke mortality,mg /day,RR,,Omega3,0.996008
adult,Stroke mortality,mg /day,Relative Hill,,Omega3,0.996008
adult,Cardiopulmonary mortality,µg /m3,RR,,PM2.5,1.069507
adult,Lung cancer mortality,µg /m3,RR,,PM2.5,1.109902
adult,Total mortality,µg /m3,RR,,PM2.5,1.047448


In [54]:
incidence = pd.DataFrame({
        'response':['Liver cancer','Fluorosis','MeHg TWI','campylobacter infection',
                    'norovirus infection','giardia infection',"Loss in child's IQ points"],
        VALUE_COLUMN:[1,1,1,1,1,1,1]
    }).set_index(['response'])
incidence = Ovariable('incidence', content = frexposed.content)
incidence.content

Unnamed: 0_level_0,Value
Age,Unnamed: 1_level_1
child,1
adult,1


In [55]:
## Population attributable fraction PAF

#def paf(

class Paf(Ovariable):
    quantity = 'PAF'
    
    def compute(self):
        for node in self.input_nodes:
            if node.quantity == 'ERF':
                erf = node
            if node.quantity == 'exposure':
                exposure = node
            if node.quantity == 'frexposed':
                frexposed = node
            if node.quantity == 'incidence':
                incidence == node
            if node.quantity == 'RR':
                rr = node
            if node.quantity == 'p_illness':
                p_illness == node

        dose = exposure.scale_exposure(erf, bw)
        
        er_function_list = list(set(exposure.content.reset_index().er_function))

        out = pd.DataFrame()

        for func in er_function_list:
            param1 = copy.deepcopy(erf) # FIXIT Do we actually need deepcopy here?
            param1.content = param1.content.loc[(func,'ERF')]
            param2 = copy.deepcopy(erf)
            param2.content = param2.content.loc[(func,'Threshold')]

            if func == 'UR':
                k = param1
                threshold = param2
                dose2 = (dose - threshold)#.dropna()
                dose2.content = np.clip(dose2.content, 0, None) # Smallest allowed value is 0
                out1 = (k * dose2 * frexposed / incidence)#.dropna()
                out = out.append(out1.content.reset_index())

            if func == 'Step':
                upper = param1
                lower = param2
                out2 = (dose >= lower) * (dose <= upper) * -1 + 1
                out2 = out2 * frexposed / incidence
                out = out.append(out2.content.reset_index())

            if func == 'RR' or func == 'Relative Hill':
                r = frexposed * (rr - 1)
                out3 = (r > 0) * (r/(r + 1)) + (r <= 0) * r
                out = out.append(out3.content.reset_index())

            if func == 'beta poisson approximation':
                out4 = ((dose/param2 + 1)**(param1 * -1) * -1 + 1) * frexposed
                out4 = (out4 / incidence * p_illness)#.dropna() # dropna is needed before an index with NaN is used for merging
                out = out.append(out4.content.reset_index())

            if func == 'exact beta poisson':
                out5 = ((param1/(param1 + param2) * dose * -1).exp() * -1 + 1) * frexposed
                out5 = out5 / incidence * p_illness
                out = out.append(out5.content.reset_index())

            if func == 'exponential':
                k = param1
                out6 = ((k * dose * -1).exp() * -1 + 1) * frexposed
                out6 = out6 / incidence * p_illness
                out = out.append(out6.content.reset_index())

        #keep = set(out.columns[out.notna().any()]) # remove indices that are empty
        #fill = set(out.columns[out.isna().any()]) # fill indices that have some empty locations
        #out = fillna(out, list(fill.intersection(keep) - {VALUE_COLUMN}))

        keep = set(out.columns)- {'scaling','matrix','exposure','exposure_unit','er_function',0}
        out = out[list(keep)].set_index(list(keep - {VALUE_COLUMN}))

        return Paf(content = out, name = self.name, meta = self.meta, unit = self.unit)

In [56]:
paf = Paf(name = 'paf', input_nodes = [erf, expo, incidence, frexposed, rr, p_illness])
paf = paf.compute()
paf.content

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Value
Age,exposure_agent,response,Unnamed: 3_level_1
child,campylobacter,campylobacter infection,0.144979
adult,Omega3,Breast cancer,-0.010206
adult,Omega3,CHD arrythmia mortality,-0.027273
adult,Omega3,CHD2 mortality,-0.001451
adult,Omega3,CHD2 mortality,-0.050746
...,...,...,...
child,TEQ,Cancer morbidity,0.001000
child,TEQ,Yes or no developmental dental defects incl. agenesis,0.384052
child,TEQ,Yes or no tooth defect,0.088627
child,TEQ,Sperm concentration,0.001800


In [57]:
# Population is typically indexed by subgroups.

population = Ovariable('population', content = pd.DataFrame({
        'Age':['adult','adult','child','child'],
        'sex':['male','female','male','female'],
        VALUE_COLUMN:[100000]*4
    }).set_index(['Age','sex']))

population.content

Unnamed: 0_level_0,Unnamed: 1_level_0,Value
Age,sex,Unnamed: 2_level_1
adult,male,100000
adult,female,100000
child,male,100000
child,female,100000


In [58]:
# Case_burden is the disease burden of a single case of disease. This may be indexed by population subgroup e.g. age.

case_burden = Ovariable(quantity = 'case_burden', content = pd.DataFrame({
        'response':['Fluorosis', 'Liver cancer', "Loss in child's IQ points",
       'MeHg TWI', 'Breast cancer', 'CHD2 mortality', 'CHD3 mortality',
       'Coronary heart disease mortality', 'Stroke mortality',
       'Cardiopulmonary mortality', 'Lung cancer mortality',
       'Total mortality', 'CHD arrythmia mortality',
       'campylobacter infection', 'norovirus infection',
       'giardia infection'],
        VALUE_COLUMN:[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
    }).set_index(['response']))

case_burden.content

Unnamed: 0_level_0,Value
response,Unnamed: 1_level_1
Fluorosis,3
Liver cancer,3
Loss in child's IQ points,3
MeHg TWI,3
Breast cancer,3
CHD2 mortality,3
CHD3 mortality,3
Coronary heart disease mortality,3
Stroke mortality,3
Cardiopulmonary mortality,3


In [59]:
# BoD is the current (observed) burden of disease (measured in disability-adjusted life years or DALYs).

class Bod(Ovariable):
    quantity = 'disease_burden'
    
    def compute(self):
        for node in self.input_nodes:
            if node.quantity == 'incidence':
                incidence = node
            if node.quantity == 'population':
                population == node
            if node.quantity == 'case_burden':
                case_burden == node
                
        out = incidence * population * case_burden

        return Bod(content = out.content, name = self.name, meta = self.meta, unit = self.unit)

bod = Bod(input_nodes = [incidence,population,case_burden]).compute()
bod.content

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Value
Age,response,sex,Unnamed: 3_level_1
adult,Fluorosis,male,300000
adult,Liver cancer,male,300000
adult,Loss in child's IQ points,male,300000
adult,MeHg TWI,male,300000
adult,Breast cancer,male,300000
...,...,...,...
child,Total mortality,female,300000
child,CHD arrythmia mortality,female,300000
child,campylobacter infection,female,300000
child,norovirus infection,female,300000


In [60]:
# bod_attr is the burden of disease that can be attributed to the exposure of interest.

class Bod_attr(Ovariable):
    quantity = 'bod_attr'
    
    def compute(self):
        for node in self.input_nodes:
            if node.quantity == 'disease_burden':
                bod = node
            if node.quantity == 'PAF':
                paf = node

        out = bod * paf
    
        return Bod_attr(content = out.content, name = self.name, meta = self.meta, unit = self.unit)
    
bod_attr = Bod_attr(input_nodes = [bod, paf], name = 'bod_attr').compute()
bod_attr.content

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Value
Age,exposure_agent,response,sex,Unnamed: 4_level_1
adult,Omega3,Breast cancer,male,-3061.857079
adult,Omega3,Breast cancer,male,-3061.857079
adult,Omega3,Breast cancer,female,-3061.857079
adult,Omega3,Breast cancer,female,-3061.857079
adult,Omega3,CHD arrythmia mortality,male,-8181.818182
adult,...,...,...,...
adult,norovirus,norovirus infection,female,299457.738542
child,campylobacter,campylobacter infection,male,43493.761025
child,campylobacter,campylobacter infection,female,43493.761025
child,giardia,giardia infection,male,56569.258074


In [61]:
#  sumExposcen subtracts the PAF of BAU scenario from the scenario of interest,
# thus giving the impact the exposure. The actual subtracting is done after the ovariable
# is evaluated by using CollapseMarginal (a standard function to manipulate ovariables in OpasnetUtils).

def sum_exposcen(out):
    if (pd.Series(['Exposcen']).isin(out.index.names).any()):
        out = out * pd.DataFrame({'Exposcen':['BAU','No exposure'], VALUE_COLUMN:[1, -1]}).set_index('Exposcen')
        tmp = list(set(out.index.names) - {'Exposcen'})
        #ou = set(['Exposcen'])
        out = out.groupby(tmp).sum()
        
    return out

# fillna fills the NaN values of a column with all values available.
# object is pandas dataframe
# cols is a list of column names to fill

def fillna(object, cols):
    ind = object.index.names
    out = object.reset_index()
    if cols != [None]:
        for i in list(cols):
            a1 = out[out[i].notna()]
            a2 = out[out[i].isna()].drop(i, axis=1).assign(tmp=1)
            addition = pd.DataFrame({i:pd.unique(a1[i]), 'tmp':1})
            a2 = a2.merge(addition).drop('tmp',axis=1)
            out = a1.append(a2)
    if ind != [None]:
        out = out.set_index(ind)
    return out

In [62]:
# mc2d function is not needed until we start using Monte Carlo and Iter in multi-index

"""
    mc2d is a function that samples the ovariable that describes individuals and then aggregates (typically averages over) the samples to reflect the situation of a defined population or population subgroups. This is done because in disease burden assessments, we are typically interested in population-level uncertainties rather than individual uncertainties.

    mc2d
    function (ova, mc2dpar = NULL)
    {
    if (is.null(mc2dpar))
    if (exists("mc2dparam"))
    mc2dpar <- mc2dparam
    else stop("Parameter list mc2dparam missing!\n")
    if (mc2dpar$run2d) {
    ova <- ova * mc2dpar$info
    require(reshape2)
    marg <- setdiff(c(colnames(ova@output)[ova@marginal],
    mc2dpar$newmarginals), "Iter")
    out <- aggregate(Value(ova), by = ova@output[colnames(ova@output) %in%
    marg], FUN = function(x) {
    strength <- if (is.null(mc2dpar$strength))
    length(x)
    else mc2dpar$strength
    apply(array(as.numeric(sample(as.character(x), strength *
    mc2dpar$N2, replace = TRUE)), dim = c(strength,
    mc2dpar$N)), MARGIN = 2, FUN = mc2dpar$fun)
    })
    temp <- melt(out[[length(out)]])
    out[[length(out)]] <- 1:nrow(out)
    colnames(temp) <- c("Nrow", "Iter", "Value")
    out <- merge(out, temp, by.x = "x", by.y = "Nrow")
    out$x <- NULL
    out <- Ovariable(output = out, marginal = colnames(out) %in%
    c(marg, "Iter"))
    }
    else {
    out <- ova
    }
    return(out)
    }

"""


'\n    mc2d is a function that samples the ovariable that describes individuals and then aggregates (typically averages over) the samples to reflect the situation of a defined population or population subgroups. This is done because in disease burden assessments, we are typically interested in population-level uncertainties rather than individual uncertainties.\n\n    mc2d\n    function (ova, mc2dpar = NULL)\n    {\n    if (is.null(mc2dpar))\n    if (exists("mc2dparam"))\n    mc2dpar <- mc2dparam\n    else stop("Parameter list mc2dparam missing!\n")\n    if (mc2dpar$run2d) {\n    ova <- ova * mc2dpar$info\n    require(reshape2)\n    marg <- setdiff(c(colnames(ova@output)[ova@marginal],\n    mc2dpar$newmarginals), "Iter")\n    out <- aggregate(Value(ova), by = ova@output[colnames(ova@output) %in%\n    marg], FUN = function(x) {\n    strength <- if (is.null(mc2dpar$strength))\n    length(x)\n    else mc2dpar$strength\n    apply(array(as.numeric(sample(as.character(x), strength *\n    mc2