In [None]:
import numpy as np
import pandas as pd
from IPython.display import display, HTML

path_data = "../data/omn/"
# file that has before and after shocks
file_arch = 'occ_archetypes_thresholds_relbase_2034_2038.csv'
network_type = 'cc' #'uniform' #'baselinemerge' # 'cc' 'tasksim' #  # baseline is cc
file_edgelist = 'edgelist_cc_mobility_merge.csv'
path_fig = "../results/figs/"
use_special_alpha = False
use_separate_in_degree_normalisation = False
cc_if_less_than_3 = False


start_year = 2020
mark_year = 2034 # year marking the period split. Exclusive of mark year
# for first period, inclusive for latter period. 
end_year = 2038

period_before = mark_year - start_year
period_after = end_year - mark_year

edge_weight = ''
if network_type == "baselinemerge":
    edge_weight = 'trans_merge_alphaweight'
if network_type == 'cc':
    edge_weight = 'trans_prob_cc'
if network_type == 'tasksim':
    edge_weight = 'Task_sim'
if network_type == 'uniform':
    edge_weight = 'trans_unif'


file_export = 'occ_shock_network' + network_type+'_assortativity_flows.csv'

# edgelist
df_edgelist =  pd.read_csv(path_data + file_edgelist)
df_nodes =  pd.read_csv(path_data + file_arch)

shock_after_name = 'shock_after_' + str(mark_year)
shock_before_name = 'shock_before_' + str(mark_year)
# Add a normalized shock column
df_nodes[shock_after_name +'_norm'] = df_nodes[shock_after_name] \
    / df_nodes['TOT_EMP']
df_nodes[shock_before_name+'_norm'] = df_nodes[shock_before_name]\
    / df_nodes['TOT_EMP']
# add log employment column
df_nodes['TOT_EMP_log'] = np.log(df_nodes['TOT_EMP'])
# Copy dataframe with less columns, used to plot
df_occ = df_nodes[['O*NET-SOC Code', 'OCC_TITLE', 'TOT_EMP', 'A_MEAN',\
    'Phase_out_r0.01',  'Permanent_boost_r0.01', 'Temporary_boost_r0.01', \
    'Late_boost_r0.01', 'shock_after_2034', 'shock_before_2034', \
    'shock_after_2034_norm', 'shock_before_2034_norm', 'TOT_EMP_log']]

# get dictionary of shocks and map to edgelist
dict_occ_shock_bf = dict(zip(df_nodes['O*NET-SOC Code'], \
    df_nodes[shock_before_name]))

dict_occ_shock_af = dict(zip(df_nodes['O*NET-SOC Code'], \
    df_nodes[shock_after_name ]))

def make_dict_shock(shock=shock_after_name):
    return dict(zip(df_nodes['O*NET-SOC Code'], df_nodes[shock]))

def make_column_shock(df, shock=shock_after_name, direction='source'):
    '''adds to the edgelist the shock of source or target occ repectively'''
    
    dict_occ_shock = make_dict_shock(shock)
    df[shock +'_' + direction]  = df['OCC_' + direction].map(dict_occ_shock)
        
make_column_shock(df_edgelist, shock=shock_after_name, direction='source')
make_column_shock(df_edgelist, shock=shock_before_name, direction='source')
make_column_shock(df_edgelist, shock=shock_after_name, direction='target')
make_column_shock(df_edgelist, shock=shock_before_name, direction='target')
make_column_shock(df_edgelist, shock=shock_after_name+'_norm', direction\
    ='source')
make_column_shock(df_edgelist, shock=shock_before_name+'_norm', direction\
    ='source')
make_column_shock(df_edgelist, shock=shock_after_name+'_norm', direction\
    ='target')
make_column_shock(df_edgelist, shock=shock_before_name+'_norm', direction\
    ='target')




### Comands to get neigh average
def make_df_shock_avneigh_shock(df, shock=shock_after_name, direction='in'):
    ''' Computes the average shock of neighbors (in or out neightbors 
    depending on direction)
    '''
    spec_add = ''
    in_add = ''
    less_three_add = ''
    if use_special_alpha == True:
        spec_add = '_spec'
    if cc_if_less_than_3 == True:
        less_three_add = '_plus3'
    if direction == 'in':
        node = 'target'
        neighbors = 'source'
        if use_separate_in_degree_normalisation:
            in_add = '_in'
    elif direction == 'out':
        node = 'source'
        neighbors = 'target'
       
    # Remove self-loop so they aren't counted in average
    df_temp = df[df['OCC_source'] != df['OCC_target']]
    
    # now group by node and take average over neigh weighted by edges
    df_output = df_temp.groupby(["OCC_" + node]).apply(\
                lambda x:np.average(x[shock +'_' + neighbors], \
                    weights=x[edge_weight + in_add + spec_add + less_three_add])).reset_index()
    
    
    df_output["node_shock"] = df_output["OCC_" + node]\
    .map(make_dict_shock(shock))
    
    df_output.rename(columns={0:'neighbors_shock'}, inplace=True)
    
    return df_output

def make_dict_avneigh_shock(df, node):
    '''Computes the average shock of neighbors and puts it in a
    occ_avneigh shock dictionary
    '''
    dict_occ_neigh = dict(zip(df['OCC_' + node], df['neighbors_shock']))
    
    return dict_occ_neigh

# Now without normalizing for shocks
def make_df_shock_flow(df, shock=shock_after_name, direction='in'):
    ''' Computes the expected (first other) flow given a shock
    to occupations. 
    '''
    if direction == 'in':
        node = 'target'
        neighbors = 'source'
    elif direction == 'out':
        node = 'source'
        neighbors = 'target'
       
    # Remove self-loop so they aren't counted in average
    df_temp = df[df['OCC_source'] != df['OCC_target']]
    
    # now group by node and take average over neigh weighted by edges
    df_output = df_temp.groupby(["OCC_" + node]).apply(\
                lambda x:np.sum(x[shock +'_' + neighbors] * x[edge_weight]))\
                .reset_index()
    
    df_output["node_shock"] = df_output["OCC_" + node]\
    .map(make_dict_shock(shock))
    
    df_output.rename(columns={0:'flow_shock'}, inplace=True)
    
    return df_output


 # Now considering transition prob is yearly, account for period
def make_df_shock_flow(df, shock=shock_after_name, direction='in'):
    ''' Computes the expected (first other) flow given a shock
    to occupations. 
    '''
    if direction == 'in':
        node = 'target'
        neighbors = 'source'
    elif direction == 'out':
        node = 'source'
        neighbors = 'target'
        
    if shock[6:8] == "be":
        period = period_before
    elif shock[6:8] == "af":
        period = period_after
       
    # Remove self-loop so they aren't counted in average
    df_temp = df[df['OCC_source'] != df['OCC_target']]
    
    # now group by node and take average over neigh weighted by edges
    df_output = df_temp.groupby(["OCC_" + node]).apply(\
                lambda x:np.sum(x[shock +'_' + neighbors] * \
                    x[edge_weight] * period_before))\
                .reset_index()
    
    df_output["node_shock"] = df_output["OCC_" + node]\
    .map(make_dict_shock(shock))
    
    df_output.rename(columns={0:'flow_shock'}, inplace=True)
    
    return df_output

 

def make_dict_flow_shock(df, node):
    '''Computes  the expected (first other) flow given a shock and puts it in a
    occ_avneigh shock dictionary
    '''
    dict_occ_neigh = dict(zip(df['OCC_' + node], df['flow_shock']))
    
    return dict_occ_neigh

# Now without normalizing for shocks
def make_df_shock_pool(df, shock=shock_after_name, direction='in'):
    ''' Computes the expected (first other) pool shock given a shock
    to occupations. 
    '''
    if direction == 'in':
        node = 'target'
        neighbors = 'source'
    elif direction == 'out':
        node = 'source'
        neighbors = 'target'
       
    # Remove self-loop so they aren't counted in average
    df_temp = df[df['OCC_source'] != df['OCC_target']]
    
    # only if edge_weight > 0
    df_temp = df_temp[df_temp[edge_weight] > 0]
    
    
    # now group by node and take average over neigh weighted by edges
    df_temp["tot_emp_" + neighbors] = df_temp["OCC_" + neighbors]\
        .map(make_dict_shock('TOT_EMP'))    
    
    df_output = df_temp.groupby(["OCC_" + node]).apply(\
                lambda x:np.sum(x[shock +'_' + neighbors]) / (np.sum(x['tot_emp_' + neighbors])))\
                .reset_index()
    
    df_output["node_shock"] = df_output["OCC_" + node]\
    .map(make_dict_shock(shock))
    
    df_output.rename(columns={0:'pool_shock'}, inplace=True)
        
    return df_output

 

def make_dict_pool_shock(df, node):
    '''Computes  the expected (first other) flow given a shock and puts it in a
    occ_avneigh shock dictionary
    '''
    dict_occ_neigh = dict(zip(df['OCC_' + node], df['pool_shock']))
    
    return dict_occ_neigh

#### Get shock and average of neighbors

directions = ['in', 'out']
shock_time = ['before', 'after']
norm_not_norm = ['', '_norm']

In [None]:
# Now without normalizing for shocks
def make_df_shock_updown(df, shock=shock_after_name, direction='in'):
    ''' Computes the expected (first other) pool shock given a shock
    to occupations. 
    '''
    if direction == 'in':
        node = 'target'
        neighbors = 'source'
    elif direction == 'out':
        node = 'source'
        neighbors = 'target'
       
    # Remove self-loop so they aren't counted in average
    df_temp = df[df['OCC_source'] != df['OCC_target']]
    
    # only if edge_weight > 0
    df_temp = df_temp[df_temp[edge_weight] > 0]
    
    phaseout_neigh = df_temp.loc[df_temp['OCC_' + neighbors].isin(df_occ.loc[df_occ['Phase_out_r0.01'] == 1, 'O*NET-SOC Code'])].groupby('OCC_' + node)['OCC_' + neighbors].count()
    phaseout_neigh.name = 'n_phaseout'

    temp_neigh = df_temp.loc[df_temp['OCC_' + neighbors].isin(df_occ.loc[df_occ['Temporary_boost_r0.01'] == 1, 'O*NET-SOC Code'])].groupby('OCC_' + node)['OCC_' + neighbors].count()
    temp_neigh.name = 'n_temp'

    perm_neigh = df_temp.loc[df_temp['OCC_' + neighbors].isin(df_occ.loc[df_occ['Permanent_boost_r0.01'] == 1, 'O*NET-SOC Code'])].groupby('OCC_' + node)['OCC_' + neighbors].count()
    perm_neigh.name = 'n_perm'

    neighs = df_temp.groupby('OCC_' + node)['OCC_' + neighbors].count()
    neighs.name = 'n_neighs'

    o = pd.concat([phaseout_neigh, temp_neigh, perm_neigh, neighs], axis=1).fillna(0)

    if shock == shock_before_name:
        o['updown_shock'] = (o.n_perm + o.n_temp - o.n_phaseout) / o.n_neighs
    else:
        o['updown_shock'] = (o.n_perm - o.n_temp - o.n_phaseout) / o.n_neighs

    df_output = o[['updown_shock']].reset_index()
    
    
    df_output["node_shock"] = df_output["OCC_" + node]\
    .map(make_dict_shock(shock))
            
    return df_output

 

def make_dict_updown_shock(df, node):
    '''Computes  the expected (first other) flow given a shock and puts it in a
    occ_avneigh shock dictionary
    '''
    dict_occ_neigh = dict(zip(df['OCC_' + node], df['updown_shock']))
    
    return dict_occ_neigh

In [None]:
df_edgelist[df_edgelist[edge_weight] > 0]

In [None]:

# add neighbor shocks to df, vary between source, targe, before after, and norm
for n in norm_not_norm:
    for d in directions:
        if d == 'in':
            node = 'target'
        elif d == 'out':
            node = 'source'
        for s in shock_time:
            if s == 'before':
                s_name = shock_before_name + n
            else:
                s_name = shock_after_name + n

            # pool shock
            df_temp = make_df_shock_pool(df_edgelist, \
                shock=s_name, direction=d)
            dict_occ_neigh = make_dict_pool_shock(df_temp, node)
            df_occ['pool_shock_' + d + s + n] = df_occ['O*NET-SOC Code']\
                .map(dict_occ_neigh)

            
            # fraction neighbours in up or down regime
            df_temp = make_df_shock_updown(df_edgelist, \
                shock=s_name, direction=d)
            dict_occ_neigh = make_dict_updown_shock(df_temp, node)
            df_occ['frac_updown_' + d + s + n] = df_occ['O*NET-SOC Code']\
                .map(dict_occ_neigh)     

In [None]:

#####
# Permanent boost analysis
#####

df_occ.to_csv(path_data + file_export, index=False)