In [None]:
import pandas as pd
import numpy as np
from matplotlib import pylab as plt
import matplotlib.colors
import matplotlib.ticker as mtick

path_data_maria = "../data/omn/"
path_data_scenarios = "../results/Data_out/results/"
path_fig = "../results/figs/"

start_year = 2021
mark_year = 2034 # year marking the period split. Exclusive of mark year
# for first period, inclusive for latter period. 
# i.e. mark_year is the start of second period
end_year = 2038#mark_year + time_until_mark#2048
print(mark_year, end_year)

shock_type = "relbase"

file_arch = "occ_archetypes_thresholds_" + shock_type + "_" + str(mark_year) \
    + "_" + str(end_year) + ".csv"

fig_out_name = "archetypes_symlog_" + shock_type + "_" + str(mark_year) \
    + "_" + str(end_year) 

archaetypes = ['Phase_out_r0.01', 'Permanent_boost_r0.01',\
    'Temporary_boost_r0.01', 'Late_boost_r0.01']

####################
# import archetypes
###################
shock_after_name = 'shock_after_' + str(mark_year)
shock_before_name = 'shock_before_' + str(mark_year)

df_nodes = pd.read_csv(path_data_maria + file_arch)

# Add a normalized shock column
df_nodes[shock_after_name +'_norm'] = df_nodes[shock_after_name] \
    / df_nodes['TOT_EMP']
df_nodes[shock_before_name+'_norm'] = df_nodes[shock_before_name]\
    / df_nodes['TOT_EMP']
# add log employment column
df_nodes['TOT_EMP_log'] = np.log(df_nodes['TOT_EMP'])
# Copy dataframe with less columns, used to plot
df_all_shocks = df_nodes[['O*NET-SOC Code', 'OCC_TITLE', 'TOT_EMP', 'A_MEAN',\
    'Phase_out_r0.01',  'Permanent_boost_r0.01', 'Temporary_boost_r0.01', \
    'Late_boost_r0.01', 'shock_after_2034', 'shock_before_2034', \
    'shock_after_2034_norm', 'shock_before_2034_norm', 'TOT_EMP_log']]


wages = df_all_shocks["A_MEAN"]
shock_after_2035 = df_all_shocks["shock_after_" + str(mark_year)]
shock_before_2035 = df_all_shocks["shock_before_" + str(mark_year)]
shock_after_2035_norm = shock_after_2035/df_all_shocks["TOT_EMP"]
shock_before_2035_norm = shock_before_2035/df_all_shocks["TOT_EMP"]




######
# Set parameters for plotting
######

symlogthresh = 0.05
fontsize = 12

n_occ = len(df_all_shocks)

# setting colors, size, etc.
color_quarter_text = "purple"
color_quarter = "k"
emp_size = 70 +  0.0001*df_all_shocks["TOT_EMP"]
egcolors = ["grey" for i in range(n_occ )]
lnwidth = [0.3 for i in range(n_occ )]

#####
# Define functions for plotting
#####

In [None]:
df_nodes['log_A_MEAN'] = np.log(df_nodes['A_MEAN'])

In [None]:
df_nodes[['log_A_MEAN'] + [col for col in df_nodes.columns]].corr()

In [None]:
def set_quadrants(ax):
    ax.axvline(0, c=color_quarter, linestyle="--")
    ax.axhline(0, c=color_quarter, linestyle="--")
    
def set_quadrant_labels():
    plt.text(0.1, 0.05,'Consistent\ndecline', ha='center', va='center', \
        fontsize=fontsize, transform=ax.transAxes,\
        color=color_quarter_text)
    plt.text(0.65, 0.05,'Late\ngrowth', ha='center', va='center', \
        fontsize=fontsize, transform=ax.transAxes, \
        color=color_quarter_text)
    plt.text(0.65, 0.9,'Consistent\ngrowth', ha='center', va='center', \
        fontsize=fontsize, transform=ax.transAxes,\
        color=color_quarter_text)
    plt.text(0.1, 0.9,'Temporary\ngrowth', ha='center', va='center', \
        fontsize=fontsize, transform=ax.transAxes,\
        color=color_quarter_text)  

def add_circle(ax):
    # Create a circle with radius 0.01 and center at (0, 0)
    circle = plt.Circle((0, 0), radius=0.01, color='grey', fill=False, \
        alpha=0.8, linewidth=2)
    # Add the circle to the axes object
    ax.add_artist(circle)  


def selected_occ(df_all_shocks, shock=shock_before_name + '_norm', \
    criteria_temp=0.005, critera_boost=-0.005):
    # selecting archetypes
    df_selected = df_all_shocks[(df_all_shocks["Permanent_boost_r0.01"] == 1) |\
                    (df_all_shocks["Late_boost_r0.01"] == 1) |\
                        
                    # select only a subset of phase our
                    ((df_all_shocks['Phase_out_r0.01'] ==1) & \
                    (df_all_shocks[shock] < critera_boost))| \
                    #select only subset of temp boost 
                    ((df_all_shocks["Temporary_boost_r0.01"] == 1) \
                    & (df_all_shocks[shock] > criteria_temp ))]
    # shortening titles

    df_selected["short_title"] = df_selected["OCC_TITLE"]\
        .str.replace('First-Line Supervisors of', 'Supervisors of\n')
    df_selected["short_title"] = df_selected["short_title"]\
        .str.replace('Miscellaneous', 'Misc.')
    df_selected["short_title"] = df_selected["short_title"]\
        .str.replace('and', '&')
    df_selected["short_title"] = df_selected["short_title"]\
        .str.replace(',', ',\n')
        
    return df_selected

def annotate_plot(sc, ax, df_selected, scale='symlog'):
    if scale =='linear':
        y = np.array(shock_after_2035_norm)
        x = np.array(shock_before_2035_norm)
        for i, txt in enumerate(df_selected["short_title"][:]):
            if i %4 == 0:
                ax.annotate(txt, (x[i], y[i]), horizontalalignment='right',\
                verticalalignment='top')
            elif i%4 == 1:
                ax.annotate(txt, (x[i], y[i]), horizontalalignment='left',\
                verticalalignment='top')
            elif i%4 == 2:
                ax.annotate(txt, (x[i], y[i]), horizontalalignment='right',\
                verticalalignment='bottom')
            elif i%4 == 3:
                ax.annotate(txt, (x[i], y[i]), horizontalalignment='left',\
                verticalalignment='bottom')

    elif scale =='symlog':
        indexes = df_selected.index.tolist()
        for i in indexes:
            txt = df_selected["short_title"].loc[i]
            x, y =  sc.get_offsets().data[i]
            if i %4 == 0:
                ax.annotate(txt, (x, y), horizontalalignment='right',\
                verticalalignment='top')
            elif i%4 == 1:
                ax.annotate(txt, (x, y), horizontalalignment='left',\
                verticalalignment='top')
            elif i%4 == 2:
                ax.annotate(txt, (x, y), horizontalalignment='right',\
                verticalalignment='bottom')
            elif i%4 == 3:
                ax.annotate(txt, (x, y), horizontalalignment='left',\
                verticalalignment='bottom')
    


df_selected = selected_occ(df_all_shocks, shock=shock_before_name + '_norm')


In [None]:

####
# Plotting
#### 

categorical_arch = df_all_shocks[archaetypes].sum(axis=1)
alpha_dots = np.where(categorical_arch == 1, 1, 0.1)

from matplotlib.collections import LineCollection

annotate_plot_bool = True


max_wage = max(wages) 
min_wage = min(wages) 

norm = matplotlib.colors.LogNorm(vmin=min_wage, vmax=max_wage)


# x labels x
labelticks_x = [-0.1, 0, 0.1, 0.2]
# labels y
labelticks_y = [-0.25, 0, 0.25, 0.5, 0.75]
# choose which ones to label and add percentage
ticks_x = np.linspace(-0.1, 0.25, 4 + 10 + 1)
ticks_x = np.round(ticks_x , 5)
new_labels_x = [str(t*100) + "%" if t in labelticks_x \
    else "" for t in ticks_x]
# choose which ones to label and add percentage
ticks_y = np.linspace(-0.25, 0.85, 5 + 20 - 3 + 1)
ticks_y = np.round(ticks_y , 5)
new_labels_y = [str(t*100) + "%" if t in labelticks_y \
    else "" for t in ticks_y]

emp_size = 75 +  0.0001*df_all_shocks["TOT_EMP"]

### Plot starts here
f, ax = plt.subplots(figsize=(10, 12))
sc = plt.scatter(shock_after_2035_norm, shock_before_2035_norm, c=wages, \
    cmap="viridis", norm=norm, alpha=0.7, \
    s=emp_size / 2, edgecolors=egcolors,linewidths=lnwidth)

set_quadrants(ax)
set_quadrant_labels()
add_circle(ax)

ax.tick_params(labelsize=16)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.set_xticks(ticks_x)
ax.set_yticks(ticks_y)

ax.set_xticklabels(new_labels_x)
ax.set_yticklabels(new_labels_y)
plt.ylabel("Percentage change in demand 2021-" +str(mark_year -1), \
    fontsize=16)
plt.xlabel("Percentage change in demand " + str(mark_year) + "-" \
    +str(end_year), \
    fontsize=16)

axins = ax.inset_axes([0.5, 0.12, 0.525, 0.7])#[0.65, 0.24, 0.315, 0.42])#
axins.scatter(shock_after_2035_norm, shock_before_2035_norm, c=wages, \
    cmap="viridis", norm=norm, alpha=0.7, \
    s=emp_size, edgecolors=egcolors,linewidths=lnwidth)

x1, x2, y1, y2 =  -0.05, 0.025, -0.05, 0.05# -0.02, 0.025, -0.03, 0.03#
axins.set_xlim(x1, x2)
axins.set_ylim(y1, y2)

set_quadrants(axins)
set_quadrant_labels()
add_circle(axins)

# set xticks
axins.set_xticks([-0.04, -0.02, 0, 0.02])#[-0.02, 0, 0.02])#

# get x labels and set font size
axins.get_xaxis().set_major_formatter(mtick.PercentFormatter(xmax=1.0, decimals=1))
for label in axins.get_xticklabels():
    label.set_fontsize(14)
# get y labels
axins.get_yaxis().set_major_formatter(mtick.PercentFormatter(xmax=1.0, decimals=1))
for label in axins.get_yticklabels():
    label.set_fontsize(14)
  
ax.indicate_inset_zoom(axins)





cbar = plt.colorbar(sc, ticks=[2e4, 3e4, 4e4, 6e4, 1.0e5, 2e5], \
                    pad=0.1023, \
                    orientation='horizontal',aspect=30)
cbar.set_label('Median wage (2018-USD annually)', fontsize=14)

# set colorbar limits
cbar.mappable.set_clim(20000, 150000)

import matplotlib.ticker as ticker
# set colorbar ticks format to be in thousands
cbar.ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: format(int(x), ',')))

#plt.title(shock_type + str(mark_year) + " " + str(end_year))

#identity line 
# plt.plot([-1,1],[-1,1], ":", alpha=0.5, color='brown')
# plt.plot([-1,1],[1,-1], ":", alpha=0.5, color='brown')

if annotate_plot_bool:
    # NOTE not possible to annotate since sc got split
    plt.savefig(path_fig + fig_out_name + "_annotated.svg")
    plt.savefig(path_fig + fig_out_name +  "_annotated.png")
    plt.show()
else:
    plt.savefig(path_fig + fig_out_name + ".png")
    plt.savefig(path_fig + fig_out_name + ".svg")
    plt.show()