In [None]:
import numpy as np
import pandas as pd
from IPython.display import display, HTML


path_data = "../data/omn/"
path_data_maria = "../data/omn/"
path_data_scenarios = "../results/Data_out/results/"
path_fig = "../results/figs/"

# Career changers
# Related from here https://www.onetcenter.org/dictionary/27.0/excel/related_occupations.html
# it's the updated version of Career Changers from my understanding, but diff classification system
file_career_changers = "Related Occupations.xlsx"
file_xwalk = "soc_2010_to_2018_crosswalk.csv"
# census files
file_edgelist = "jb/edgelist_qualitycontrol_2011_2019_bls-9feb.csv"

# self loop value according to calculations following Lehm et al.
occ_mobility = 0.06415
self_loop = 1 - occ_mobility
adjustment = 0.55

start_year = 2021
mark_year = 2034 # year marking the period split. Exclusive of mark year
# for first period, inclusive for latter period. 
# i.e. mark_year is the start of second period
end_year = 2038#mark_year + time_until_mark#2048
print(mark_year, end_year)

shock_type = "relbase"
file_arch = "occ_archetypes_thresholds_" + shock_type + "_" + str(mark_year) \
    + "_" + str(end_year) + ".csv"

archaetypes = ['Phase_out_r0.01', 'Permanent_boost_r0.01',\
    'Temporary_boost_r0.01', 'Late_boost_r0.01']

####################
# import archetypes
###################
df_shocks = pd.read_csv(path_data_maria + file_arch)
shocks_soc = set(df_shocks["O*NET-SOC Code"])

# Number of related occupations considered
n_top = 20
##############
# Career Changers / Related Occupations file
##############
df_cc_raw = pd.read_excel(path_data + file_career_changers)
# focus on 6 digit
df_cc_raw ["O*NET-SOC Code"] = df_cc_raw ["O*NET-SOC Code"].str.slice(stop=7)
df_cc_raw ["Related O*NET-SOC Code"] = df_cc_raw ["Related O*NET-SOC Code"]\
    .str.slice(stop=7)
# crosswalk to soc 2010
df_xwalk = pd.read_csv(path_data + file_xwalk, sep=";")
dict_soc18_soc10 = dict(zip(df_xwalk["2018 SOC Code"],df_xwalk["2010 SOC Code"]))


In [None]:
# merge df_xwalk with df_cc_raw
df_cc_raw = df_cc_raw.merge(df_xwalk, left_on="O*NET-SOC Code", right_on="2018 SOC Code", how="left")

In [None]:
# set O*NET-SOC Code to soc10
df_cc_raw ['O*NET-SOC Code'] = df_cc_raw ['2010 SOC Code']
# drop columns that are not needed
df_cc_raw.drop(['2018 SOC Code', '2010 SOC Code'], axis=1, inplace=True)

In [None]:
df_cc_raw = df_cc_raw.merge(df_xwalk, left_on="Related O*NET-SOC Code", right_on="2018 SOC Code", how="left")
# set O*NET-SOC Code to soc10
df_cc_raw ['Related O*NET-SOC Code'] = df_cc_raw ['2010 SOC Code']
# drop columns that are not needed
df_cc_raw.drop(['2018 SOC Code', '2010 SOC Code'], axis=1, inplace=True)

In [None]:
# Aggregating codes to match
def map_socs_to_shock(x):
    if x in shocks_soc:
        return x
    else:
        try:
            if x[:-1] + "0" in shocks_soc:
                return x[:-1] + "0"
            elif x[:-2] + "00" in shocks_soc:
                return x[:-2] + "00"
            elif x[:-3] + "000" in shocks_soc:
                return x[:-3] + "000"
            else:
                return np.nan
        except:
            return np.nan
        
df_cc_raw ['O*NET-SOC Code'] = df_cc_raw ['O*NET-SOC Code']\
    .apply(map_socs_to_shock)
df_cc_raw ['Related O*NET-SOC Code'] = df_cc_raw ['Related O*NET-SOC Code']\
    .apply(map_socs_to_shock)

In [None]:

len(df_cc_raw ['O*NET-SOC Code'].unique())

# group edges
df_grouped = df_cc_raw.groupby(['O*NET-SOC Code', 'Related O*NET-SOC Code'])\
    ['Index'].mean().reset_index()
    
idx = df_grouped.groupby(['O*NET-SOC Code'])['Index']\
    .nsmallest(n_top, keep='first').index.get_level_values(1)
# start dataframe where to have both all edges and add node info such as shocks
df_cc = df_grouped.loc[idx].set_index('O*NET-SOC Code')\
    .loc[df_grouped['O*NET-SOC Code'].drop_duplicates()].reset_index()
# Remove realted occupations that are self loops (thees will be added later)
df_cc = df_cc[df_cc['O*NET-SOC Code'] != df_cc['Related O*NET-SOC Code']]

# get out degree
values = df_cc['O*NET-SOC Code'].value_counts(dropna=False).keys().tolist()
counts = df_cc['O*NET-SOC Code'].value_counts(dropna=False).tolist()
value_dict = dict(zip(values, counts))
df_cc['out_degree'] = df_cc['O*NET-SOC Code'].map(value_dict)
# df_cc['out_degree'].unique() # array([10,  7,  9,  5,  8,  6])
df_cc["trans_prob_cc"] = occ_mobility * (1./df_cc["out_degree"])

cc_soc = set(df_cc["O*NET-SOC Code"]).union(set(df_cc["Related O*NET-SOC Code"]))
len(cc_soc)


# add self loops
last_col = df_cc.index[-1]
for i, soc in enumerate(cc_soc):
    df_cc.loc[last_col +  i] = [soc, soc, np.nan, value_dict[soc], self_loop]
    
    
########
# Census data   
########

df = pd.read_csv(path_data + file_edgelist)

df_edgelist = df
# map shocks
dict_soc_title = dict(zip(df_shocks['O*NET-SOC Code'], df_shocks["OCC_TITLE"]))
dict_soc_title = dict(zip(df_shocks['O*NET-SOC Code'], df_shocks["OCC_TITLE"]))
dict_soc_emp = dict(zip(df_shocks['O*NET-SOC Code'], df_shocks["TOT_EMP"]))
dict_soc_shock_before = dict(zip(df_shocks['O*NET-SOC Code'], \
    df_shocks["shock_before_" + str(mark_year)]))
dict_soc_shock_after = dict(zip(df_shocks['O*NET-SOC Code'], \
    df_shocks["shock_after_" + str(mark_year)]))

df_edgelist["transition_prob_BLS_ASECnorm"] = \
    df_edgelist["transition_adj_networkers_BLS"]\
        /df_edgelist["EMPOCCLY_unadj_BLS"]

df_edgelist["transition_prob_BLS_ASECnorm_in"] = \
    df_edgelist["transition_adj_networkers_BLS"]\
        /df_edgelist["EMPOCC_unadj_BLS"]
        
### NOTE Since some occupations are dropped (Fishing etc), we need to 
# renomarlize

print("check probs sum to one ", np.isclose(\
    df_edgelist.groupby("OCCLY_BLS")["transition_prob_BLS_ASECnorm"].sum(), 1)\
        .sum() ==  len(df_edgelist.groupby("OCCLY_BLS")\
        ["transition_prob_BLS_ASECnorm"].sum()))

print("check probs sum to one ", np.isclose(\
    df_edgelist.groupby("OCC_BLS")["transition_prob_BLS_ASECnorm_in"].sum(), 1)\
        .sum() ==  len(df_edgelist.groupby("OCC_BLS")\
        ["transition_prob_BLS_ASECnorm_in"].sum()))

dict_soc_outdeg = df_edgelist.groupby("OCCLY_BLS")\
    ["transition_prob_BLS_ASECnorm"].sum().to_dict()

dict_soc_indeg = df_edgelist.groupby("OCC_BLS")\
    ["transition_prob_BLS_ASECnorm_in"].sum().to_dict()

dict_soc_corr = {}
for key, value in dict_soc_outdeg.items():
    dict_soc_corr[key] = 1 + 1 - value
    
dict_soc_corr_in = {}
for key, value in dict_soc_indeg.items():
    dict_soc_corr_in[key] = 1 + 1 - value





df_edgelist["corr"] = df_edgelist["OCCLY_BLS"].map(dict_soc_corr)
df_edgelist["transition_prob_BLS_ASECnorm_corr"] = \
    df_edgelist["transition_prob_BLS_ASECnorm"]*df_edgelist["corr"]

df_edgelist["corr_in"] = df_edgelist["OCC_BLS"].map(dict_soc_corr)
df_edgelist["transition_prob_BLS_ASECnorm_in_corr"] = \
    df_edgelist["transition_prob_BLS_ASECnorm_in"]*df_edgelist["corr_in"]

# print("check probs sum to one ", np.isclose(\
#     df_edgelist.groupby("OCCLY_BLS")["transition_prob_BLS_ASECnorm"].sum(), 1\
#         , rtol=0.01).sum() ==  len(df_edgelist.groupby("OCCLY_BLS")\
#         ["transition_prob_BLS_ASECnorm"].sum()))

print('not sum 1 (should be empty)', df_edgelist.groupby("OCCLY_BLS")["transition_prob_BLS_ASECnorm_corr"].sum()[~np.isclose(df_edgelist.\
    groupby("OCCLY_BLS")["transition_prob_BLS_ASECnorm_corr"].sum(), 1, rtol=0.01)]
)

print('not sum 1 (should be empty)', df_edgelist.groupby("OCC_BLS")["transition_prob_BLS_ASECnorm_in_corr"].sum()[~np.isclose(df_edgelist.\
    groupby("OCC_BLS")["transition_prob_BLS_ASECnorm_in_corr"].sum(), 1, rtol=0.01)]
)

set(df_edgelist['OCCLY_BLS'])
set(df_cc['O*NET-SOC Code'])

#### Merge

df_edgelist = df_edgelist.drop(columns=['Unnamed: 0', 'OCC_ASEC', \
                                        'OCCLY_label_ASEC', 'OCC_label_ASEC', \
                                        'OCCLY_ASEC'])
df_cc = df_cc.drop(columns=['Index'])

df_cc = df_cc.rename({'O*NET-SOC Code': 'OCC_source', \
    'Related O*NET-SOC Code': 'OCC_target'}, axis='columns')
df_edgelist = df_edgelist.rename({'OCCLY_BLS': 'OCC_source', \
    'OCC_BLS': 'OCC_target'}, axis='columns')
    
df_edgelist.columns

df_mobility = df_edgelist[['OCC_source', 'OCC_target', \
    'transition_prob_BLS_ASECnorm_corr', 'transition_prob_BLS_ASECnorm_in_corr', 'OCC_TITLE_OCC',
       'OCC_TITLE_OCCLY', 'EMPOCCLY_unadj_BLS', 'EMPOCC_unadj_BLS', \
           'TOT_EMP_OCC', 'A_MEAN_OCC', 'TOT_EMP_OCCLY' ]]


In [None]:
for occ in set(df_cc.OCC_target):
    
    prob_sum   = df_cc.loc[(df_cc.OCC_target == occ) & (df_cc.OCC_source != occ), 'trans_prob_cc'].sum()
    prob_count = df_cc.loc[(df_cc.OCC_target == occ) & (df_cc.OCC_source != occ), 'trans_prob_cc'].count()
    
    df_cc.loc[(df_cc.OCC_target == occ) & (df_cc.OCC_source != occ), 'trans_prob_cc_in'] = prob_sum / prob_count
    df_cc.loc[(df_cc.OCC_target == occ) & (df_cc.OCC_source == occ), 'trans_prob_cc_in'] = \
        df_cc.loc[(df_cc.OCC_target == occ) & (df_cc.OCC_source == occ), 'trans_prob_cc'] 

In [None]:
set(df_edgelist['OCC_source'])
set(df_cc['OCC_source'])

df_cc.to_csv(path_data + "career_changers_mobility_edgelist.csv",index=False)
df_mobility.to_csv(path_data + "asec_to_bls_mobility_edgelist.csv",index=False)

################
## Make merger
###############

In [None]:
df_both = pd.merge(df_mobility, df_cc, how="outer", \
    on=["OCC_source", "OCC_target"], indicator=True)

print('edges in cc', len(df_cc))
print('edges in mobility', len(df_mobility))

α = len(df_mobility)/(len(df_cc) +  len(df_mobility))

# Joris added alpha_specific
count_cc = df_both[df_both['trans_prob_cc'] > 0].groupby("OCC_source")['trans_prob_cc'].count()
count_omn = df_both[df_both['transition_prob_BLS_ASECnorm_corr'] > 0].groupby("OCC_source")['transition_prob_BLS_ASECnorm_corr'].count()

alpha_specific =  count_omn / (count_cc + count_omn)

In [None]:
df_both['out_degree_both'] = df_both['OCC_source'].map(df_both[['OCC_source', 'OCC_target']].groupby(df_both.OCC_source).agg('nunique')['OCC_target'])
df_both['in_degree_both'] = df_both['OCC_target'].map(df_both[['OCC_source', 'OCC_target']].groupby(df_both.OCC_target).agg('nunique')['OCC_source'])

df_both['trans_unif_in'] = 1 / df_both['in_degree_both']
df_both['trans_unif'] = 1 / df_both['out_degree_both']

In [None]:
df_both["_merge"].value_counts()

df_both['_merge'] = df_both['_merge'].astype(str)  # convert to string type
df_both = df_both.fillna(0)  # fill missing values with 0

df_both.loc[df_both["OCC_source"] == '39-1010']["transition_prob_BLS_ASECnorm_corr"].sum()
df_both.loc[df_both["OCC_source"] == '39-1010']["trans_prob_cc"].sum()
df_both.loc[df_both["OCC_source"] == "11-1010"]["trans_prob_cc"].sum()
df_both.loc[df_both["OCC_source"] == "11-1010"]["transition_prob_BLS_ASECnorm_corr"].sum()

dict_soc_zerocc = df_both.groupby("OCC_source")['trans_prob_cc']\
    .sum().to_dict()
dict_soc_zeromobility = df_both.groupby("OCC_source")\
    ['transition_prob_BLS_ASECnorm_corr'].sum().to_dict()

dict_soc_zerocc_in = df_both.groupby("OCC_target")['trans_prob_cc']\
    .sum().to_dict()
dict_soc_zeromobility_in = df_both.groupby("OCC_target")\
    ['transition_prob_BLS_ASECnorm_in_corr'].sum().to_dict()

for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc[row["OCC_source"]] > 0) & (dict_soc_zeromobility[row["OCC_source"]] > 0):
        df_both.at[index, "trans_merge_alphaweight_spec"] = alpha_specific.loc[row.OCC_source] * \
                row["transition_prob_BLS_ASECnorm_corr"] + (1 - alpha_specific.loc[row.OCC_source]) * \
                row["trans_prob_cc"]
    elif dict_soc_zerocc[row["OCC_source"]] == 0:
        df_both.at[index, "trans_merge_alphaweight_spec"] = row["transition_prob_BLS_ASECnorm_corr"]
    elif dict_soc_zeromobility[row["OCC_source"]] == 0:
        df_both.at[index, "trans_merge_alphaweight"] =  row["trans_prob_cc"]
        
for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc_in[row["OCC_target"]] > 0) & (dict_soc_zeromobility_in[row["OCC_target"]] > 0):
        df_both.at[index, "trans_merge_alphaweight_in_spec"] = alpha_specific.loc[row.OCC_target] * \
                row["transition_prob_BLS_ASECnorm_in_corr"] + (1 - alpha_specific.loc[row.OCC_target]) * \
                row["trans_prob_cc_in"]
    elif dict_soc_zerocc_in[row["OCC_target"]] == 0:
        df_both.at[index, "trans_merge_alphaweight_in_spec"] = row["transition_prob_BLS_ASECnorm_in_corr"]
    elif dict_soc_zeromobility_in[row["OCC_target"]] == 0:
        df_both.at[index, "trans_merge_alphaweight_in_spec"] =  row["trans_prob_cc_in"]
        
for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc[row["OCC_source"]] > 0) & (dict_soc_zeromobility[row["OCC_source"]] > 0) & \
        (count_omn[row['OCC_source']] > 3):
        df_both.at[index, "trans_merge_alphaweight_spec_plus3"] = alpha_specific.loc[row.OCC_source] * \
                row["transition_prob_BLS_ASECnorm_corr"] + (1 - alpha_specific.loc[row.OCC_source]) * \
                row["trans_prob_cc"]
    elif dict_soc_zerocc[row["OCC_source"]] == 0:
        df_both.at[index, "trans_merge_alphaweight_spec_plus3"] = row["transition_prob_BLS_ASECnorm_corr"]
    elif (dict_soc_zeromobility[row["OCC_source"]] == 0) | (count_omn[row['OCC_source']] <= 3):
        df_both.at[index, "trans_merge_alphaweight_spec_plus3"] =  row["trans_prob_cc"]
        
for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc_in[row["OCC_target"]] > 0) & (dict_soc_zeromobility_in[row["OCC_target"]] > 0) & \
        (count_omn[row['OCC_target']] > 3):
        df_both.at[index, "trans_merge_alphaweight_in_spec_plus3"] = alpha_specific.loc[row.OCC_target] * \
                row["transition_prob_BLS_ASECnorm_in_corr"] + (1 - alpha_specific.loc[row.OCC_target]) * \
                row["trans_prob_cc_in"]
    elif dict_soc_zerocc_in[row["OCC_target"]] == 0:
        df_both.at[index, "trans_merge_alphaweight_in_spec_plus3"] = row["transition_prob_BLS_ASECnorm_in_corr"]
    elif (dict_soc_zeromobility_in[row["OCC_target"]] == 0) | (count_omn[row['OCC_target']] <= 3):
        df_both.at[index, "trans_merge_alphaweight_in_spec_plus3"] =  row["trans_prob_cc_in"]

α = len(df_mobility)/(len(df_cc) +  len(df_mobility))
for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc[row["OCC_source"]] > 0) & (dict_soc_zeromobility[row["OCC_source"]] > 0):
        df_both.at[index, "trans_merge_alphaweight"] = α * row["transition_prob_BLS_ASECnorm_corr"] + (1 - α) * row["trans_prob_cc"]
    elif dict_soc_zerocc[row["OCC_source"]] == 0:
        df_both.at[index, "trans_merge_alphaweight"] = row["transition_prob_BLS_ASECnorm_corr"]
    elif dict_soc_zeromobility[row["OCC_source"]] == 0:
        df_both.at[index, "trans_merge_alphaweight"] =  row["trans_prob_cc"]
        
α = len(df_mobility)/(len(df_cc) +  len(df_mobility))
for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc[row["OCC_target"]] > 0) & (dict_soc_zeromobility[row["OCC_target"]] > 0):
        df_both.at[index, "trans_merge_alphaweight_in"] = α * row["transition_prob_BLS_ASECnorm_in_corr"] + (1 - α) * row["trans_prob_cc_in"]
    elif dict_soc_zerocc[row["OCC_target"]] == 0:
        df_both.at[index, "trans_merge_alphaweight_in"] = row["transition_prob_BLS_ASECnorm_in_corr"]
    elif dict_soc_zeromobility[row["OCC_target"]] == 0:
        df_both.at[index, "trans_merge_alphaweight_in"] =  row["trans_prob_cc_in"]

        
α = 0.5
for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc[row["OCC_source"]] > 0) & (dict_soc_zeromobility[row["OCC_source"]] > 0):
        df_both.at[index, "trans_merge_alpha05"] = α * row["transition_prob_BLS_ASECnorm_corr"] + (1 - α) * row["trans_prob_cc"]
    elif dict_soc_zerocc[row["OCC_source"]] == 0:
        df_both.at[index, "trans_merge_alpha05"] = row["transition_prob_BLS_ASECnorm_corr"]
    elif dict_soc_zeromobility[row["OCC_source"]] == 0:
        df_both.at[index, "trans_merge_alpha05"] =  row["trans_prob_cc"]

        
for index, row in df_both.iterrows():
    # if both are above 0 use normal mix
    if (dict_soc_zerocc[row["OCC_target"]] > 0) & (dict_soc_zeromobility[row["OCC_target"]] > 0):
        df_both.at[index, "trans_merge_alpha05_in"] = α * row["transition_prob_BLS_ASECnorm_in_corr"] + (1 - α) * row["trans_prob_cc"]
    elif dict_soc_zerocc[row["OCC_target"]] == 0:
        df_both.at[index, "trans_merge_alpha05_in"] = row["transition_prob_BLS_ASECnorm_in_corr"]
        #df_both.at[index, "trans_prob_cc"] =  row["transition_prob_BLS_ASECnorm_corr"]
    elif dict_soc_zeromobility[row["OCC_target"]] == 0:
        df_both.at[index, "trans_merge_alpha05_in"] =  row["trans_prob_cc"]
#     if dict_soc_zerocc[row["OCC_target"]] == 0:
#         df_both.at[index, "trans_prob_cc"] =  row["transition_prob_BLS_ASECnorm_in_corr"]
        
df_both.groupby("OCC_source")['trans_merge_alphaweight'].sum()[~np.isclose(df_both.groupby("OCC_source")['trans_merge_alphaweight'].sum(), 1)]

df_both.groupby("OCC_target")['trans_merge_alphaweight_in'].sum()[~np.isclose(df_both.groupby("OCC_target")['trans_merge_alphaweight_in'].sum(), 1)]

In [None]:
df_both

In [None]:
# add node data
occ_title_occ_dict = df_mobility[['OCC_target', 'OCC_TITLE_OCC']].set_index('OCC_target')['OCC_TITLE_OCC'].to_dict()
occ_emp_occ_dict = df_mobility[['OCC_target', 'TOT_EMP_OCC']].set_index('OCC_target')['TOT_EMP_OCC'].to_dict()
a_mean_occ_dict = df_mobility[['OCC_target', 'A_MEAN_OCC']].set_index('OCC_target')['A_MEAN_OCC'].to_dict()

df_both['OCC_TITLE_OCC'] = df_both.OCC_target.map(occ_title_occ_dict)
df_both['OCC_TITLE_OCCLY'] = df_both.OCC_source.map(occ_title_occ_dict)
df_both['TOT_EMP_OCC'] = df_both.OCC_target.map(occ_emp_occ_dict)
df_both['TOT_EMP_OCCLY'] = df_both.OCC_source.map(occ_emp_occ_dict)
df_both['A_MEAN_OCC'] = df_both.OCC_target.map(a_mean_occ_dict)


df_mobility


df_both.to_csv(path_data + "edgelist_cc_mobility_merge.csv",index=False)

set(df_both['OCC_source'])

In [None]:
in_sum = df_both[~(df_both.OCC_source == df_both.OCC_target)]\
            [['OCC_target', 'trans_merge_alphaweight_in_spec_plus3']].\
            groupby('OCC_target').sum()

in_sum[in_sum.trans_merge_alphaweight_in_spec_plus3 == 0]

In [None]:
# I now impute these with trans_merge_alphaweight_in_spec

In [None]:
df_both.loc[df_both.OCC_target == '43-5040', 'trans_merge_alphaweight_in_spec_plus3'] = \
        df_both.loc[df_both.OCC_target == '43-5040', 'trans_merge_alphaweight_in_spec']

df_both.loc[df_both.OCC_target == '49-9095', 'trans_merge_alphaweight_in_spec_plus3'] = \
        df_both.loc[df_both.OCC_target == '49-9095', 'trans_merge_alphaweight_in_spec']

In [None]:
df_both.to_csv(path_data + "edgelist_cc_mobility_merge.csv",index=False)

In [None]:
cc_sum = df_both[~(df_both.OCC_source == df_both.OCC_target)]\
            [['OCC_target', 'trans_prob_cc']].\
            groupby('OCC_target').sum()

cc_sum[cc_sum.trans_prob_cc == 0]

In [None]:
cc_sum = df_both[~(df_both.OCC_source == df_both.OCC_target)]\
            [['OCC_source', 'trans_prob_cc']].\
            groupby('OCC_source').sum()

cc_sum[cc_sum.trans_prob_cc == 0]

In [None]:
missing_cc = cc_sum[cc_sum.trans_prob_cc == 0].index

In [None]:
# find 'OCC_TITLE_OCCLY' in df_both for all missing_cc in OCC_source
df_both[df_both.OCC_source.isin(missing_cc)][['OCC_source', 'OCC_TITLE_OCCLY']].drop_duplicates()

In [None]:
for occ in cc_sum[cc_sum.trans_prob_cc == 0].index:
    df_both.loc[df_both.OCC_source == occ, 'trans_prob_cc'] = \
        df_both.loc[df_both.OCC_source == occ, 'trans_unif']

In [None]:
df_both.to_csv(path_data + "edgelist_cc_mobility_merge.csv",index=False)