In [None]:
import numpy as np
import pandas as pd

In [None]:
analysis = '../analysis/'
data = '../data/'
data_out = '../results/data_out/'
output_folder = '../results/data_out/results/'
fig_folder = '../results/figs/'
version = '3_oct_2022'
ylim = (-250000, 600000)

In [None]:
df_occ = pd.read_csv(data_out + 'occ_names_bls_minor_major.csv', index_col=0)
df_occ = df_occ[['OCC_CODE', 'OCC_TITLE', 'TOT_EMP']]

In [None]:
cols = ['Wind-capex', 'Solar-capex',
       'Natural gas-capex', 'Coal-capex', 'Biomass-capex', 'Geothermal-capex',
       'Hydro-capex', 'Battery storage-capex', 'T&D-capex', 'Biomass-opex', 'Coal-opex',
       'Solar-opex', 'Natural gas-opex', 'Geothermal-opex', 'Hydro-opex',
       'Nuclear-opex', 'Wind-opex', 'Transmission and distribution-opex',
       'Battery storage-opex']

df_full = pd.DataFrame()
for yr in range(2021, 2050, 1):
    df = pd.read_csv(output_folder + 'diff_baseline_employment_effects_' + \
                                        str(yr-1) + '-'+ str(yr) + \
                                         '__' + version + '.csv', index_col=0)
    df['year'] = yr
    df_full = pd.concat([df_full, df])

df_full = df_full.reset_index().set_index(['index', 'year'])
df_try = df_full.loc[:,cols].unstack().T.copy()
df_try.index = df_try.index.droplevel(0)
df_try = df_try.groupby(df_try.index).sum()
df_try.loc[2020, :] = 0
df_try.sort_index(inplace=True)

cumul_plus = df_try[df_try > 0].sum()
cumul_min = -1 * df_try[df_try < 0].sum()

# cumul_plus = temp + perm
# cumul_min = lost + temp
dff = (cumul_plus - cumul_min)

perm = dff.clip(lower=0.0)
lost = (-1 * dff).clip(lower=0.0)
temp = (cumul_plus - perm)

ind_perm = perm
ind_lost = lost
ind_temp = temp
split_year = 2034
end_year = 2038
# shock 2035 split
df_until2035 = df_full.loc[df_full.index.get_level_values('year').isin(range(2020, split_year + 1)), 'emp_tot'].groupby(df_full.loc[df_full.index.get_level_values('year').isin(range(2020, split_year + 1))].index.get_level_values('index')).sum()
df_after2035 = df_full.loc[df_full.index.get_level_values('year').isin(range(split_year + 1, end_year + 1)), 'emp_tot'].groupby(df_full.loc[df_full.index.get_level_values('year').isin(range(split_year + 1, end_year + 1))].index.get_level_values('index')).sum()

# rel employment 2035 split
merged_2035 = df_occ.merge(df_until2035, left_on='OCC_CODE', right_index=True, how='right')
merged_2035.set_index('OCC_CODE', inplace=True)
df_until2035_rel_emp = merged_2035.emp_tot / merged_2035.TOT_EMP

merged_2035 = df_occ.merge(df_after2035, left_on='OCC_CODE', right_index=True, how='right')
merged_2035.set_index('OCC_CODE', inplace=True)
df_after2035_rel_emp = merged_2035.emp_tot / merged_2035.TOT_EMP
ind_profs = pd.concat([ind_temp, ind_lost, ind_perm], axis=1).rename(columns={0: 'temp', 1: 'lost', 2: 'perm'})

ind_profs = ind_profs.merge(df_occ, right_on='OCC_CODE', left_index=True, how='outer')
# rel shock
ind_profs_rel_shock = ind_profs.copy()
ind_profs_rel_shock[['temp', 'lost', 'perm']] = ind_profs_rel_shock[['temp', 'lost', 'perm']].div(ind_profs_rel_shock[['temp', 'lost', 'perm']].sum())

# rel employment
ind_profs_rel_emp = ind_profs.copy()
ind_profs_rel_emp[['temp', 'lost', 'perm']] = ind_profs_rel_emp[['temp', 'lost', 'perm']].div(ind_profs_rel_emp.TOT_EMP, axis=0)

# RCA
ind_profs_rca = ind_profs.copy()
ind_profs_rca.TOT_EMP = ind_profs_rca.TOT_EMP / ind_profs_rca.TOT_EMP.sum()
ind_profs_rca[['temp', 'lost', 'perm']] = ind_profs_rca[['temp', 'lost', 'perm']].div(ind_profs_rca[['temp', 'lost', 'perm']].sum())# = ind_profs_rca.div(ind_profs_rca.sum())
# and
ind_profs_rca[['temp', 'lost', 'perm']] = ind_profs_rca[['temp', 'lost', 'perm']].div(ind_profs_rca.TOT_EMP, axis=0)
ind_profs = ind_profs.drop(['OCC_TITLE', 'TOT_EMP'], axis=1).set_index('OCC_CODE')
ind_profs_rel_emp = ind_profs_rel_emp.drop(['OCC_TITLE', 'TOT_EMP'], axis=1).set_index('OCC_CODE')
ind_profs_rel_shock = ind_profs_rel_shock.drop(['OCC_TITLE', 'TOT_EMP'], axis=1).set_index('OCC_CODE')
ind_profs_rca = ind_profs_rca.drop(['OCC_TITLE', 'TOT_EMP'], axis=1).set_index('OCC_CODE')

In [None]:
display(ind_profs.head())
display(ind_profs_rel_emp.head())
display(ind_profs_rel_shock.head())
display(ind_profs_rca.head())

In [None]:
thres = 0.01
display((ind_profs_rel_emp > thres).sum())
ind_profs_rel_emp_25 = ind_profs_rel_emp.copy()
ind_profs_rel_emp_25['idmax'] = ind_profs_rel_emp_25.idxmax(axis=1)
ind_profs_rel_emp_25 = ind_profs_rel_emp_25.loc[(ind_profs_rel_emp_25.drop('idmax', axis=1)>thres).any(axis=1)]

In [None]:
ind_profs_rel_emp.to_csv('../results/data_out/occs_archetypes_dynamic.csv')