In [None]:
import pandas as pd
import numpy as np

pd.set_option("display.precision", 15)

GRID = 'grid'
MG = 'mg'
SHS = 'shs'

ELECTRIFICATION_OPTIONS = [GRID, MG, SHS]
BAU_SENARIO = 'bau'
SE4ALL_SHIFT_SENARIO = 'se4all_shift'
PROG_SENARIO = 'prog'
SCENARIOS = [BAU_SENARIO, SE4ALL_SHIFT_SENARIO, PROG_SENARIO]

MIN_TIER_LEVEL = 3
MIN_RATED_CAPACITY = {3: 200, 4: 800, 5: 2000} # index is TIER level [W]
MIN_ANNUAL_CONSUMPTION = {3: 365, 4: 1250, 5: 3000} # index is TIER level [kWh/a]

# drives for the socio-economic model
MENTI = pd.DataFrame({MG: [3, 13. / 6, 19. / 6, 3.25, 11. / 3],
                    SHS: [23. / 12, 4.5, 37. / 12, 17. / 6, 41. / 12],
                   'labels': ['high_gdp', 'high_mobile_money', 'high_ease_doing_business', 'low_corruption', 'high_grid_weakness']
                   })
MENTI = MENTI.set_index('labels')

RISE_SHIFTS = ['shift_%s_share' % opt for opt in ELECTRIFICATION_OPTIONS]

POP_GET = ['pop_get_%s_2030' % opt for opt in ELECTRIFICATION_OPTIONS]
HH_GET = ['hh_get_%s_2030' % opt for opt in ELECTRIFICATION_OPTIONS]
HH_CAP = ['hh_%s_capacity' % opt for opt in ELECTRIFICATION_OPTIONS]
HH_SCN2 = ['hh_cap_scn2_%s_capacity' % opt for opt in ELECTRIFICATION_OPTIONS]
EXO_RESULTS = POP_GET + HH_GET + HH_CAP + HH_SCN2
EXO_RESULTS

CAP = ['cap_sn2_grid_tier_up', 'cap_sn2_mg_tier_up', 'cap_sn2_shs_tier_up']

In [None]:
SCENARIOS_DICT = {
    BAU_SENARIO: 'BaU',
    SE4ALL_SHIFT_SENARIO: 'SE4All',
    PROG_SENARIO: 'prOG',
    'indiv':'Individual'
}
from data_preparation import compute_ndc_results_from_raw_data

a={
    sce: compute_ndc_results_from_raw_data(sce).to_json for sce in SCENARIOS
}

In [None]:
xls_bau = pd.read_csv('data/xls_bau.csv', float_precision='high')
xls_se = pd.read_csv('data/xls_se.csv', float_precision='high')
xls_prog = pd.read_csv('data/xls_prog.csv', float_precision='high')

## Raw data loading

In [None]:
df = pd.read_csv('data/raw_data.csv', float_precision='high')
df.head()

# BaU senario

In [None]:
from data_preparation import prepare_endogenous_variables, prepare_bau_data, extract_results_scenario

df = pd.read_csv('data/raw_data.csv', float_precision='high')

df = prepare_endogenous_variables(df)
df = prepare_bau_data(df)

bau_df = extract_results_scenario(df, BAU_SENARIO)
#bau_df = bau_df.set_index('country_iso')

In [None]:
from data_preparation import compute_ndc_results_from_raw_data
bau_df = compute_ndc_results_from_raw_data(BAU_SENARIO)

In [None]:
df_diff = xls_bau[EXO_RESULTS] - bau_df[EXO_RESULTS]
df_diff['iso'] = bau_df.country_iso
df_diff = df_diff.set_index('iso')

def highlight_mismatch(col):
    eps = 0.1
    return df_diff.loc[np.abs(df_diff[col]) > eps]

l = []
for col in EXO_RESULTS:
    temp = highlight_mismatch(col).index.to_list()
    if temp:
        print('problems with ', col, temp)
    l = l + temp
len(set(l))
l = list(set(l))
print(l)
df_diff.loc[l]

# Se4All+SHIFT senario

In [None]:
from data.data_preparation import prepare_endogenous_variables, prepare_se4all_data, extract_results_scenario

df = pd.read_csv('data/raw_data.csv', float_precision='high')

df = prepare_endogenous_variables(df)
df = prepare_se4all_data(df)
se_df = extract_results_scenario(df, SE4ALL_SHIFT_SENARIO)


In [None]:
from data.data_preparation import prepare_endogenous_variables, prepare_se4all_data, extract_results_scenario

RISE_INDICES = ['rise_%s' % opt for opt in ELECTRIFICATION_OPTIONS]


df = pd.read_csv('data/raw_data.csv', float_precision='high')

df = prepare_endogenous_variables(df, min_tier_level=3)

df = prepare_se4all_data(df)
df[RISE_SHIFTS]

df['norm'] = df.loc[:, RISE_INDICES].sum(axis=1)
df[RISE_INDICES + ['norm'] + RISE_SHIFTS]

In [None]:
norm = df.loc[nz_idxs, RISE_INDICES].sum(axis=1)
cond = (df['rise_grid'] == 0) & (df[RISE_INDICES].sum(axis=1) != 0)
df.loc[cond, 'shift_mg_share'] = df.loc[cond, 'rise_mg'].div(df.loc[cond, RISE_INDICES].sum(axis=1))
df.loc[cond, 'shift_shs_share'] = df.loc[cond, 'rise_shs'].div(df.loc[cond, RISE_INDICES].sum(axis=1))
df.loc[cond, RISE_INDICES + ['country_iso', 'norm'] + RISE_SHIFTS]
#df.loc[cond]

In [None]:
df.loc[5, 'rise_grid'] = 1
df.loc[5, RISE_INDICES]

In [None]:
df = pd.DataFrame(data=[[100, 1, 45], [100, 0, 45], [100, 1, 2], [100, 0, 2], [100, 0, 0], [50, 40, 45], [60,70,30]], columns=['rise_grid', 'rise_mg', 'rise_shs'])

def prepare_se4all_data(
        input_df,
):
    # for se4all+SHIFT

    df = input_df.copy()

    # indexes for which all three RISE scores are 0
    nz_idxs = df.loc[:, RISE_INDICES].sum(axis=1) != 0
    # sum of the RISE scores for the electrification options, used as normalization's factor
    norm = df.loc[nz_idxs, RISE_INDICES].sum(axis=1)

    for col in ['shift_grid_share', 'shift_mg_share', 'shift_shs_share']:
        # if the sum of the RISE scores is 0, the corresponding rows
        # in the given columns are set to 0
        df.loc[df.loc[:, RISE_INDICES].sum(axis=1) == 0, col] = 0

    # compute the weight of the 
    df.loc[nz_idxs, 'shift_grid_share'] = \
        2 * df.loc[nz_idxs, 'rise_grid'] \
        - df.loc[nz_idxs, 'rise_mg'] \
        - df.loc[nz_idxs, 'rise_shs']
    df.loc[nz_idxs, 'shift_grid_share'] = df.loc[nz_idxs, 'shift_grid_share'].div(norm)

    df.loc[nz_idxs, 'shift_mg_share'] = \
        2 * df.loc[nz_idxs, 'rise_mg'] \
        - df.loc[nz_idxs, 'rise_grid'] \
        - df.loc[nz_idxs, 'rise_shs']
    df.loc[nz_idxs, 'shift_mg_share'] = df.loc[nz_idxs, 'shift_mg_share'].div(norm)

    df.loc[nz_idxs, 'shift_shs_share'] = \
        2 * df.loc[nz_idxs, 'rise_shs'] \
        - df.loc[nz_idxs, 'rise_grid'] \
        - df.loc[nz_idxs, 'rise_mg']
    df.loc[nz_idxs, 'shift_shs_share'] = df.loc[nz_idxs, 'shift_shs_share'].div(norm)

    # if one of the RISE score is 0, then the share is reallocated between the two other
    cond = (df['rise_grid'] == 0) & (df[RISE_INDICES].sum(axis=1) != 0)
    df.loc[cond, 'shift_mg_share'] = df.loc[cond, 'rise_mg'].div(
        df.loc[cond, RISE_INDICES].sum(axis=1))
    df.loc[cond, 'shift_shs_share'] = df.loc[cond, 'rise_shs'].div(
        df.loc[cond, RISE_INDICES].sum(axis=1))

    cond = (df['rise_mg'] == 0) & (df[RISE_INDICES].sum(axis=1) != 0)
    df.loc[cond, 'shift_grid_share'] = df.loc[cond, 'rise_grid'].div(
        df.loc[cond, RISE_INDICES].sum(axis=1))
    df.loc[cond, 'shift_shs_share'] = df.loc[cond, 'rise_shs'].div(
        df.loc[cond, RISE_INDICES].sum(axis=1))

    cond = (df['rise_shs'] == 0) & (df[RISE_INDICES].sum(axis=1) != 0)
    df.loc[cond, 'shift_grid_share'] = df.loc[cond, 'rise_grid'].div(
        df.loc[cond, RISE_INDICES].sum(axis=1))
    df.loc[cond, 'shift_mg_share'] = df.loc[cond, 'rise_mg'].div(
        df.loc[cond, RISE_INDICES].sum(axis=1))
    return df
df = prepare_se4all_data(df)

for opt in ELECTRIFICATION_OPTIONS:
    cond = (df['rise_%s' % opt] == df[RISE_INDICES].sum(axis=1))
    if cond.values.any():
        print(df.loc[cond])
df

In [None]:
df = pd.DataFrame(data=[[100, 1, 45], [100, 0, 45], [100, 1, 2], [100, 0, 2], [100, 0, 0], [50, 40, 45], [60,70,30]], columns=['rise_grid', 'rise_mg', 'rise_shs'])

def prepare_se4all_data(
        input_df,
):
    # for se4all+SHIFT

    df = input_df.copy()

    # indexes for which all three RISE scores are 0
    nz_idxs = df.loc[:, RISE_INDICES].sum(axis=1) != 0
    # sum of the RISE scores for the electrification options, used as normalization's factor
    norm = df.loc[nz_idxs, RISE_INDICES].sum(axis=1)

    for col in ['shift_grid_share', 'shift_mg_share', 'shift_shs_share']:
        # if the sum of the RISE scores is 0, the corresponding rows
        # in the given columns are set to 0
        df.loc[df.loc[:, RISE_INDICES].sum(axis=1) == 0, col] = 0

    # compute the weight of the 
    df.loc[nz_idxs, 'shift_grid_share'] = \
        2 * df.loc[nz_idxs, 'rise_grid'] \
        - df.loc[nz_idxs, 'rise_mg'] \
        - df.loc[nz_idxs, 'rise_shs']
    df.loc[nz_idxs, 'shift_grid_share'] = df.loc[nz_idxs, 'shift_grid_share'].div(norm)

    df.loc[nz_idxs, 'shift_mg_share'] = \
        2 * df.loc[nz_idxs, 'rise_mg'] \
        - df.loc[nz_idxs, 'rise_grid'] \
        - df.loc[nz_idxs, 'rise_shs']
    df.loc[nz_idxs, 'shift_mg_share'] = df.loc[nz_idxs, 'shift_mg_share'].div(norm)

    df.loc[nz_idxs, 'shift_shs_share'] = \
        2 * df.loc[nz_idxs, 'rise_shs'] \
        - df.loc[nz_idxs, 'rise_grid'] \
        - df.loc[nz_idxs, 'rise_mg']
    df.loc[nz_idxs, 'shift_shs_share'] = df.loc[nz_idxs, 'shift_shs_share'].div(norm)

    return df
df = prepare_se4all_data(df)

for opt in ELECTRIFICATION_OPTIONS:
    cond = (df['rise_%s' % opt] == df[RISE_INDICES].sum(axis=1))
    if cond.values.any():
        print(df.loc[cond])
df

In [None]:
df.loc[non_zero_indices,'shift_grid_share'] = 2 * df.loc[non_zero_indices, 'rise_grid'] - df.loc[non_zero_indices, 'rise_mg'] - df.loc[non_zero_indices, 'rise_shs']
df.loc[non_zero_indices, 'shift_grid_share'] = df.loc[non_zero_indices, 'shift_grid_share'].div(norm)

df.loc[non_zero_indices,'shift_mg_share'] = 2 * df.loc[non_zero_indices, 'rise_mg'] - df.loc[non_zero_indices, 'rise_grid'] - df.loc[non_zero_indices, 'rise_shs']
df.loc[non_zero_indices, 'shift_mg_share'] = df.loc[non_zero_indices, 'shift_mg_share'].div(norm)

df.loc[non_zero_indices,'shift_shs_share'] = 2 * df.loc[non_zero_indices, 'rise_shs'] - df.loc[non_zero_indices, 'rise_grid'] - df.loc[non_zero_indices, 'rise_mg']
df.loc[non_zero_indices, 'shift_shs_share'] = df.loc[non_zero_indices, 'shift_shs_share'].div(norm)

for opt in ELECTRIFICATION_OPTIONS:
    df['pop_get_%s_2030' % opt] = \
        df['endo_pop_get_%s_2030' % opt] * (1 + df['shift_%s_share' % opt])



In [None]:

# share of population which will be on the grid in the se4all+SHIFT scenario
df.loc[non_zero_indices, 'shift_grid_share'] = 2 * df.rise_grid - df.rise_mg - df.rise_shs
# divide by sume here
df.loc[non_zero_indices, 'shift_grid_share']

# share of population which will have changed from grid to mg in the se4all+SHIFT scenario
df.loc[non_zero_indices, 'shift_mg_share'] = \
    (df.rise_mg * weight_grid + df.shift_menti_mg * weight_mentis) / weighted_norm

# share of population which will have changed from grid to shs in the se4all+SHIFT scenario
df.loc[non_zero_indices, 'shift_shs_share'] = \
    (df.rise_shs * weight_grid + df.shift_menti_shs * weight_mentis) / weighted_norm

# SHARED WITH prOG
# if the predicted mg share is larger than the predicted grid share, the number of people
# predicted to use mg in the se4all+SHIFT scenario is returned, otherwise it is set to 0
df.loc[df.shift_grid_to_mg_share >= df.shift_grid_share, 'shift_pop_grid_to_mg'] = \
    df.shift_grid_to_mg_share * df.endo_pop_get_grid_2030
df.loc[df.shift_grid_to_mg_share < df.shift_grid_share, 'shift_pop_grid_to_mg'] = 0

# if the predicted shs share is larger than the predicted grid share, the number of people
# predicted to use shs in the se4all+SHIFT scenario is returned, otherwise it is set to 0
df.loc[df.shift_grid_to_shs_share >= df.shift_grid_share, 'shift_pop_grid_to_shs'] = \
    df.shift_grid_to_shs_share * df.endo_pop_get_grid_2030
df.loc[df.shift_grid_to_shs_share < df.shift_grid_share, 'shift_pop_grid_to_shs'] = 0


In [None]:
df_diff = xls_se[EXO_RESULTS] - se_df[EXO_RESULTS]
df_diff['iso'] = se_df.country_iso
df_diff = df_diff.set_index('iso')

def highlight_mismatch(col):
    eps = 0.02
    return df_diff.loc[np.abs(df_diff[col]) > eps]

l = []
for col in EXO_RESULTS:
    temp = highlight_mismatch(col).index.to_list()
    if temp:
        print('problems with ', col, temp)
    l = l + temp
len(set(l))
l = list(set(l))
print(l)
df_diff.loc[l]


In [None]:
for col in POP_GET:
    l = l + highlight_mismatch(col).index.to_list()
len(set(l))
set(l)

In [None]:
col = 'pop_dark_2017'
col = ['shift_pop_grid_to_mg', 'shift_pop_grid_to_shs']
col = ['endo_pop_get_%s_2030' % opt for opt in ELECTRIFICATION_OPTIONS]
col = ['shift_grid_share', 'shift_grid_to_mg_share', 'shift_grid_to_shs_share']
#col = ['shift_menti_mg', 'shift_menti_shs']
#col = HH_CAP
#col = ['gdp_class', 'mobile_money_class', 'ease_doing_business_class', 'corruption_class', 'weak_grid_class']
print(col)
se_df[col].loc[se_df.country_iso.isin(['BWA'])]
#se_df.columns

# prOG senario

In [None]:
%%time
from data_preparation import prepare_endogenous_variables, prepare_prog_data, extract_results_scenario

df = pd.read_csv('data/raw_data.csv', float_precision='high')
df = prepare_endogenous_variables(df)
df = prepare_prog_data(df)

prog_df = extract_results_scenario(df, PROG_SENARIO)

In [None]:
df_diff = xls_prog[EXO_RESULTS] - prog_df[EXO_RESULTS]
df_diff['iso'] = se_df.country_iso
df_diff = df_diff.set_index('iso')

def highlight_mismatch(col):
    eps = 0.1
    return df_diff.loc[np.abs(df_diff[col])  > eps]

l = []
for col in EXO_RESULTS:
    temp = highlight_mismatch(col).index.to_list()
    if temp:
        print('problems with ', col, temp)
    l = l + temp
len(set(l))
l = list(set(l))
print(l)
df_diff.loc[l]


## variables that are not shared between prOG and se4all+SHIFT

## BaU senario data

## se4all+SHIFT senario data

## Exogenous results for all senarii

In [None]:
se_df = extract_results_senario(df, SE4ALL_SHIFT_SENARIO)

In [None]:
bau_df = extract_results_senario(df, BAU_SENARIO)
bau_df.set_index('country_iso')

In [None]:
prog_df = extract_results_senario(df, PROG_SENARIO)

In [None]:
pop_get_grid_2030,pop_get_mg_2030,pop_get_shs_2030,hh_get_grid_2030,hh_get_mg_2030,hh_get_shs_2030,hh_grid_capacity,hh_mg_capacity,hh_shs_capacity,hh_cap_scn2_grid_capacity,hh_cap_scn2_mg_capacity,hh_cap_scn2_shs_capacity

In [None]:
xls_bau = pd.read_csv('daten/xls_bau.csv', float_precision='high')
xls_se = pd.read_csv('daten/xls_se.csv', float_precision='high')

In [None]:
xls_bau = pd.read_csv('daten/xls_bau.csv', float_precision='high')
xls_se = pd.read_csv('daten/xls_se.csv', float_precision='high')

df_diff = xls_bau[EXO_RESULTS] - bau_df[EXO_RESULTS]
df_diff['iso'] = bau_df.country_iso
df_diff = df_diff.set_index('iso')

def highlight_mismatch(col):
    eps = 0.02
    return df_diff.loc[np.abs(df_diff[col].round(0)) > eps]

l = []

for col in EXO_RESULTS:
    l = l + highlight_mismatch(col).index.to_list()
len(set(l))

col = 'hh_mg_capacity'
l = highlight_mismatch(col).index.to_list()
print(l)
df_diff.loc[l]

In [None]:
col = 'hh_mg_capacity'
l = highlight_mismatch(col).index.to_list()
print(l)
df_diff.loc[l]

In [None]:
df.loc[df.country_iso == 'IND']

In [None]:
xls_bau[EXO_RESULTS].head()

In [None]:
bau_df[EXO_RESULTS].head()

In [None]:
prog_df[EXO_RESULTS].head()

In [None]:
se_df[EXO_RESULTS].head()

In [None]:
df_diff = xls_se[EXO_RESULTS] - se_df[EXO_RESULTS]
df_diff['iso'] = se_df.country_iso
df_diff.set_index('iso')

def highlight_mismatch(col):
    eps = 0.1
    return df_diff.loc[np.abs(df_diff[col]) > eps]

l = []

for col in EXO_RESULTS:
    l = l + highlight_mismatch(col).iso.to_list()
len(set(l))

In [None]:
#=WENN(Q22<4,5;0;WENN(Q22<=9;0,5;WENN(Q22>9;1;"NA")))

def map_weak_grid_class(weak_grid_idx):
    """Assign an index value to differentiate weak grid"""
    answer = 1
    if weak_grid_idx <= 4.5:
        answer = 0.5
    if weak_grid_idx <= 9:
        answer = 0
    return answer

#=WENN(O25<26;0;WENN(O25<=33;0,5;WENN(O25>33;1;"NA")))

def map_corruption_class(corruption_idx):
    """Assign an index value to differentiate corruption"""
    answer = 1
    if corruption_idx <= 33:
        answer = 0.5
    if corruption_idx <= 26:
        answer = 0
    return answer

#=WENN(M22<131;0;WENN(M22<=164;0,5;WENN(M22>164;1;"NA")))

def map_ease_doing_business_class(business_ease):
    """Assign an index value to differentiate ease of doing business"""
    answer = 1
    if business_ease <= 164:
        answer = 0.5
    if business_ease <= 131:
        answer = 0
    return answer

#Se4All

def map_gdp_class(gdp_per_capita):
    """Assign an index value to differentiate gdp per capita"""
    answer = 1
    if gdp_per_capita < 1500:
        answer = 0.5
    if gdp_per_capita < 700:
        answer = 0
    return answer
    
#=WENN(K5<=0,12;0;WENN(K5<=0,21;0,5;1))

def map_mobile_money_class(mobile_money):
    """Assign an index value to differentiate mobile_money"""
    answer = 1
    if mobile_money <= 0.21:
        answer = 0.5
    if mobile_money <= 0.12:
        answer = 0
    return answer


In [None]:
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                    'B': [5, -1, 7, -3, 9],
                   'C': [4, 7, 3, 9, 1],
                   'D': [1, 5, 3, 0, 1],
                   'E': [7, 7, 2, 9, 7],
                    'Z': ['a', 'a', 'b', 'c', 'c']})
WEIGHT_GRID = 0.8  # $RT_shift_factors.$O$2
WEIGHT = 0.2  # $RT_shift_factors.$P$2



RISE_INDICES = ['A', 'B']
SHIFT_MENTI = []
# norm = df.loc[:,RISE_INDICES].sum(axis=1) * WEIGHT_GRID + df.loc[:,SHIFT_MENTI].sum(axis=1) * WEIGHT
# df['E'] * WEIGHT_GRID / norm

entries = ['shift_grid_share','shift_grid_to_mg_share', 'shift_grid_to_shs_share']
# df.loc[df.loc[:,RISE_INDICES + SHIFT_MENTI].sum(axis=1) == 0, 'shift_grid_share' ] = '0'
for entry in entries:
    df.loc[df.B >0, entry ] = '0'

df