In [None]:
# uncomment for wider display:
#from IPython.core.display import display, HTML
#display(HTML("<style>.container { width:95% !important; }</style>"))

### Plotting notebook

#### Most of the functions below have built-in filtering capability.  Uncomment lines which begin with 'attr1' to sample...

##### To view the docstrings for all plotting functions below, click within the name of the function and then use keyboard combination Shift+TAB.

imports and notebook plotting configuration

In [None]:
%%time
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib import cm

import functions as f
import matplotlib_charting as mp

#plt.style.use('bmh')
#plt.style.use('fivethirtyeight')
sns.set_style('whitegrid')

%matplotlib inline
pd.set_option('notebook_repr_html', False)
pd.set_option('max_columns', 50)
pd.set_option('expand_frame_repr', True)
pd.set_option('max_rows', 18)
pd.set_option('display.precision', 3)
pd.options.display.width = 150
pd.options.display.max_colwidth=0

pd.set_option('display.float_format', lambda x: '%.4f' % x)
np.set_printoptions(linewidth=200, edgeitems=10)

plt.rcParams['figure.figsize']=(12.0, 9.0)   
plt.rcParams['font.size']=14              

plt.rcParams['axes.labelsize']=14
plt.rcParams['xtick.labelsize']=13
plt.rcParams['ytick.labelsize']=13
plt.rcParams['axes.titlesize']=16
plt.rcParams['lines.linewidth']=3
plt.rcParams['legend.fontsize']=14
plt.rcParams['legend.fancybox']=True

In [None]:
#%%time
#f.save_and_load_dill_folder(load_case='sample3')
#f.save_and_load_dill_folder()

read and assign dictionary values for computed datasets

In [None]:
%%time
ds_dict = f.load_datasets()

In [None]:
%%time
sdict = pd.read_pickle('dill/dict_settings.pkl')
adict = pd.read_pickle('dill/dict_attr.pkl')
cdict = pd.read_pickle('dill/dict_color.pkl')
tdict = pd.read_pickle('dill/dict_job_tables.pkl')

view of first 5 rows of calculated dataset corresponding to proposal 1

the calculated dataset is stored in the 'dill' folder as ds_p1.pkl

the original proposal list is in the excel folder, in the sample3 folder, the 'proposals.xlsx' file, the 'p1 worksheet.

the dataframe version of the proposed list is in the dill folder, 'p_p1.pkl'.

In [None]:
%%time
ds_dict['p1'].head()

assign reusable variables for functions below

In [None]:
%%time
enhanced_jobs = sdict['enhanced_jobs']
job_levels = sdict['num_of_job_levels']
eg_colors = cdict['eg_colors']
job_colors = cdict['job_colors']
p_dict = sdict['p_dict']
p_dict_verbose = sdict['p_dict_verbose']
job_strs = sdict['job_strs']
ret_age = sdict['ret_age']
starting_date = sdict['starting_date']
job_dict = sdict['jd']

assign proposal variable (string)

In [None]:
job_levels

In [None]:
%%time
proposal = 'p1'

assign p variable as the proposal dataframe to use for many examples below

In [None]:
%%time
p = ds_dict[proposal]

view of columns in p dataset

In [None]:
sdict['num_of_job_levels']

In [None]:
p.columns

view of colors represented by color dictionary 'job_colors' value

In [None]:
ds_dict['standalone'].columns

In [None]:
%%time
sns.palplot(job_colors)

assign sample employee and employee list variables for some functions

these are median samples...

In [None]:
%%time
# sample employee - halfway down the list from first month
first_month = p[p.mnum==0]
sample_emp = first_month.iloc[int(len(first_month) / 2)].empkey

In [None]:
%%time
# sample employee list - median of each employee group
sample_emp_list = []
g = p[p.mnum==0][['eg', 'empkey']].groupby('eg')
egs = pd.unique(p.eg)
for eg in egs:
    sample_emp_list.append(g.get_group(eg).iloc[int(len(g.get_group(eg)) * .5)].empkey)
g =[]

In [None]:
%%time
mp.percent_diff_bins('p1', 'standalone', 2, kind='bar', ds_dict=ds_dict, num_display_colors=25,
                     #attr3='ldate', oper3='<=', val3='1998-05-31',
                     #attr2='s_lmonths', oper2='>=', val2=200,
                     #attr1='ret_mark', oper1='==', val1=1,
                     #cmap_pos='terrain', cmap_neg='terrain',
                     chart_style='ticks',
                     title_size=14, man_plotlim=None, invert_barh=False, legend_size=12.5)

In [None]:
%%time
mp.eg_attributes('p1', 'date', 'cat_order',
                 sdict, adict, cdict,
                 ds_dict=ds_dict,
                 mnum=0,
                 ret_only=True,
                 eg_list=[2],
                 chart_style='ticks',
                 #attr1='ldate', oper1='<=', val1='1990-12-31',
                 #attr2='ldate', oper2='>=', val2='1986-12-31',
                 q_eglist_only=False,
                 xquant_lines=True,
                 yquant_lines=True,
                 xsize=14, ysize=11)

In [None]:
%%time
mp.eg_attributes('p1', 'retdate', 'ldate',
                 sdict, adict, cdict,
                 ds_dict=ds_dict,
                 mnum=0,
                 #markersize=7,
                 marker_alpha=.75,
                 tick_size=14,
                 qtick_size=13,
                 label_size=16,
                 ret_only=True,
                 eg_list=[1],
                 attr1='ldate', oper1='<=', val1='1999-12-31',
                 chart_style='ticks',
                 q_eglist_only=True,
                 xquant_lines=True,
                 yquant_lines=True,
                 xsize=16, ysize=14)

In [None]:
%%time
# uncomment the two lines of arguments below for an example of attribute filtering
mp.quantile_years_in_position('p1', 'standalone', job_levels, 40,
                              job_strs, p_dict,
                              job_colors, ds_dict=ds_dict,
                              flip_x=True, flip_y=False,
                              xsize=12, ysize=16, plot_differential=True,
                              chart_style='ticks', grid_alpha=.35,
                              #attr1='ldate', oper1='<=', val1='1997-12-31',
                              #attr2='ldate', oper2='>=', val2='1986-12-31',
                              rotate=True, custom_color=False, cm_name='Set1',
                              normalize_yr_scale=False, gain_loss_bg=True)

age vs. list percentage for a specific month

In [None]:
%%time
mnum = 60  # month number
mp.age_vs_spcnt('p1', [1,2], mnum, eg_colors,
                p_dict, ret_age,
                ds_dict=ds_dict,
                attr1='ldate', oper1='<=', val1='1999-12-31',
                attr2='jnum', oper2='>=', val2=5)

employees from each group holding a specific job level

In [None]:
%%time
jnum = 4  # job number
job_p = p[p.jnum==jnum]
mp.age_vs_spcnt(p, [1,2,3], 0, eg_colors,
                p_dict, ret_age,
                ds_dict=ds_dict,
                size=20,
                xsize=10, ysize=8,
                image_dir='im')

In [None]:
%%time
jnum = 4  # job number
job_p = p[p.jnum==jnum]
mp.age_vs_spcnt(job_p, [1,2,3], 42, eg_colors,
                p_dict, ret_age,
                ds_dict=ds_dict)

In [None]:
%%time
mp.multiline_plot_by_emp('p1', 'jobp', 'date', sample_emp_list,
                         job_levels, ret_age,
                         eg_colors, job_strs, sdict,
                         adict, ds_dict=ds_dict)

In [None]:
%%time
mp.multiline_plot_by_emp('p1', 'mpay', 'spcnt', sample_emp_list,
                         job_levels, ret_age,
                         eg_colors, job_strs, sdict,
                         adict, ds_dict=ds_dict)

In [None]:
%%time
#samp_emps = [20011251, 10012608]
samp_emps = [10010692, 20010130]
mp.multiline_plot_by_emp('p1', 'jnum', 'date', samp_emps,
                         job_levels, ret_age,
                         eg_colors, job_strs, sdict,
                         adict, ds_dict=ds_dict, plot_jobp=True, xsize=14, ysize=10)

In [None]:
%%time
mp.multiline_plot_by_emp('p1', 'jnum', 'date', sample_emp_list,
                         job_levels, ret_age,
                         eg_colors, job_strs, sdict,
                         adict, chart_style='ticks', ds_dict=ds_dict)

In [None]:
%%time
mp.eg_attributes('p3', 'cat_order', 'ylong',
                 sdict, adict, cdict,
                 ds_dict=ds_dict,
                 mnum=40,
                 #ret_only=True,
                 eg_list=[1,2,3],
                 chart_style='ticks',
                 attr1='ldate', oper1='<=', val1='1999-12-31',
                 q_eglist_only=True,
                 xquant_lines=True,
                 yquant_lines=True,
                 xsize=14, ysize=11)

In [None]:
%%time
mp.eg_attributes('p1', 'jnum', 'doh',
                 sdict, adict, cdict,
                 ds_dict=ds_dict,
                 mnum=55,
                 #ret_only=True,
                 eg_list=[1,2,3],
                 chart_style='ticks',
                 attr1='ldate', oper1='<=', val1='2022-12-31',
                 q_eglist_only=True,
                 xquant_lines=True,
                 yquant_lines=True,
                 xsize=14, ysize=11)

In [None]:
%%time
mp.eg_attributes('p1', 'spcnt', 'cat_order',
                 sdict, adict, cdict,
                 ds_dict=ds_dict,
                 mnum=60,
                 #ret_only=True,
                 eg_list=[1,2],
                 chart_style='ticks',
                 markersize=0,
                 linestyle='solid',
                 linewidth=4,
                 attr1='ldate', oper1='<=', val1='1999-12-31',
                 q_eglist_only=True,
                 xquant_lines=True,
                 yquant_lines=True,
                 xsize=14, ysize=11)

In [None]:
%%time
# if a FutureWarning is triggered, it is due to an update to matplotlib 3.0 which
# affects the seaborn violinplot function.  This warning is expected to be corrected
# when the seaborn library is updated to meet the matplotlib 3.0 specifications.
mp.violinplot_by_eg(p, 's_lmonths', ret_age,
                    cdict,
                    adict, ds_dict=ds_dict,
                    saturation=.6,
                    chart_style='darkgrid',
                    mnum=60, scale='count')

In [None]:
%%time
mp.age_kde_dist('p1', eg_colors,
                p_dict, ret_age,
                ds_dict=ds_dict, mnum=0)

In [None]:
%%time
mp.eg_diff_boxplot(['p1', 'p2', 'p3'], 'standalone', [2],
                   cdict['eg_colors_lgt'],
                   job_levels,
                   job_levels + 1,
                   adict, ds_dict=ds_dict,
                   #attr1='ldate', oper1='<=', val1='1986-12-31',
                   measure='spcnt', comparison='',
                   linewidth=.75,
                   year_clip=2065, chart_style='darkgrid')

In [None]:
%%time
mp.eg_boxplot(['standalone', 'p1', 'p2', 'p3'], [1,2], cdict['eg_colors_lgt'], job_levels + 1,
              adict, year_clip=2065,
              #attr1='retdate', oper1='<=', val1='2022-12-31',
              show_whiskers=True, xsize=16, ysize=8,
              measure='spcnt', ds_dict=ds_dict, chart_style='darkgrid')

In [None]:
%%time
mp.stripplot_dist_in_category('p1', job_levels,
                              sdict['stripplot_full_time_pcnt'],
                              eg_colors,
                              job_colors, sdict['job_strs_dict'],
                              adict,
                              p_dict, mnum=35, ds_dict=ds_dict, bg_alpha=.15,
                              #attr1='age', oper1='>=', val1='55',
                              #attr2='ldate', oper2='<=', val2='1995-12-31',
                              #fur_color=[0.5, 0.5, 0.5, .5],
                              ysize=11, xsize=4)

In [None]:
df1 = ds_dict['p1']

In [None]:
df1[df1.mnum==40]['fur'].sum()

In [None]:
%%time
# 20011150, 10013738, 30010419
# 10011447, 20010130
mp.multiline_plot_by_emp('p1', 'spcnt', 'date', sample_emp_list,
                         job_levels, ret_age,
                         eg_colors, job_strs, sdict,
                         adict, line_alpha=.75, through_date='2031-01-01',
                         chart_style='ticks', ds_dict=ds_dict, plot_jobp=True)

In [None]:
%%time
# 20011150, 10013738, 30010419
# 10011447, 20010130
mp.job_level_progression('p1', #[10011447, 20010130],
                         sample_emp_list,
                         '2031-01-31',
                         sdict, cdict,
                         eg_colors, job_colors,
                         ds_dict=ds_dict, job_bands_alpha=.15)

In [None]:
%%time
mp.job_level_progression('p1', sample_emp_list, '2030-12-31',
                         sdict, cdict,
                         eg_colors, job_colors,
                         ds_dict=ds_dict)

In [None]:
p.columns

In [None]:
%%time
mp.differential_scatter(['p1', 'p2', 'p3'], 'standalone', 'spcnt',
                        [1, 2, 3], adict, cdict, p_dict, ds_dict=ds_dict, prop_order=True,
                        attr1='ret_mark', oper1='==', val1=1,
                        show_scatter=True, show_lin_reg=False,
                        show_mean=False, mean_len=80,
                        dot_size=20, lin_reg_order=12,
                        ylimit=False, ylim=5,
                        bright_bg=True,
                        chart_style='whitegrid')

In [None]:
%%time
# where is an employee located on a proposed list compared to employees from other
# groups who share the same attribute, such as longevity date?
# positive numbers mean better location
# areas are outlined by respective employee group color(s)
mp.cohort_differential('p2', 1, sdict, cdict, adict,
                       #attr1='age', oper1='<', val1=50,
                       ds_dict=ds_dict, mnum=0, measure='ldate',
                       sort_xax_by_measure=True, single_eg_compare=None,
                       compare_value='1988-12-31', bg_color=None, #'#ffffe6',
                       xsize=16, ysize=12)

In [None]:
%%time
mp.parallel(['p1', 'p2'], 'standalone', [1,2],
            'spcnt', [0, 40, 80, 120, 200],
            job_levels,
            eg_colors, sdict, adict,
            ds_dict=ds_dict, facecolor='#fffff5',
            chart_style='whitegrid',
            #attr1='ldate', oper1='<=', val1='1997-12-31',
            left=0, xsize=5, ysize=5)

In [None]:
%%time
mp.job_grouping_over_time('p1', [1,2,3], job_strs,
                          job_colors, p_dict,
                          rets_only=True, ds_dict=ds_dict,
                          #attr1='ldate', oper1='<=', val1='1997-12-31',
                          time_group='A', plt_kind='bar',
                          display_yrs=40, legend_loc=4, xsize=14,
                          ysize=9)

integrated distribution of all jobs, all employee groups

In [None]:
%%time
mp.rows_of_color('p1', 62, ['jnum'], eg_colors,
                 job_colors, sdict, ds_dict=ds_dict,
                 cols=120, job_only=False, jnum=4,
                 cell_border=True, eg_border_color='.75', eg_list=[1],
                 job_border_color='.75', xsize=14, ysize=9, fur_color='c')

In [None]:
%%time
mp.rows_of_color('p1', 62, ['eg', 'fur'], eg_colors,
                 job_colors, sdict, ds_dict=ds_dict,
                 cols=150, job_only=False, jnum=6,
                 cell_border=True, eg_border_color='.3',
                 job_border_color='.55', xsize=15, ysize=9)

In [None]:
%%time
mp.rows_of_color('p1', 62, ['eg'], eg_colors,
                 job_colors, sdict, ds_dict=ds_dict,
                 cols=100, job_only=True, jnum=4,
                 attr1='age', oper1='>=', val1='60',
                 cell_border=False, eg_border_color='.6',
                 job_border_color='.85', xsize=15, ysize=9)

In [None]:
%%time
mp.rows_of_color('p1', 62, ['eg', 'fur'], eg_colors,
                 job_colors, sdict, ds_dict=ds_dict,
                 cols=100, job_only=True, jnum=4,
                 #attr1='ldate', oper1='<=', val1='1987-12-31',
                 cell_border=False, eg_border_color='.3',
                 job_border_color='.85', xsize=15, ysize=9)

In [None]:
%%time
mp.quantile_bands_over_time('p1', 1, 'lspcnt', bins=20, ds_dict=ds_dict,
                            year_clip=None, kind='area',
                            quantile_ticks=True,
                            quantile_alpha=.65, cm_name='nipy_spectral',
                            grid_alpha=.5,
                            alt_bg_color=False, bg_color='#faf6eb')

In [None]:
%%time
mp.job_transfer('p1', 'standalone', 1,
                job_colors, job_levels,
                job_strs, p_dict,
                ds_dict=ds_dict, chart_style='ticks',
                gb_period='M')

In [None]:
%%time
d1 = ds_dict['p1']
d2 = ds_dict['standalone']
attr = 'doh'
value = '1989-12-31'
mp.job_transfer(d1[d1[attr] <= value], d2[d2[attr] <= value],
                2, job_colors, job_levels,
                job_strs, p_dict,
                ds_dict=ds_dict,
                gb_period='M')

In [None]:
%%time
mp.eg_multiplot_with_cat_order('p1', 35, 'cat_order', 'spcnt',
                               job_strs, job_colors,
                               job_levels, sdict, adict, cdict,
                               egs=[2,3], ds_dict=ds_dict,
                               job_bands_alpha=.2, xsize=14,
                               plot_scatter=True, s=15, a=.7, lw=0,
                               tick_size=11)

In [None]:
mp.make_color_list()

In [None]:
mp.make_color_list(num_of_colors=4, cm_name_list=['Accent'])

In [None]:
ds_dict['p1'].columns

In [None]:
%%time
mp.diff_range(['p1', 'p2', 'p3'], 'standalone', 'jobp', [1,2,3],
              adict, ds_dict=ds_dict, cm_name='Set1',
              #attr1='ldate', oper1='<=', val1='1991-12-31',
              #attr2='ldate', oper2='>=', val2='1985-12-31',
              show_range=False, show_mean=True,
              year_clip=2030, range_alpha=.3, chart_style='ticks',
              tick_size=13, suptitle_size=16, title_size=16, label_size=16, legend_size=14, image_dir='im', image_format='png')

In [None]:
%%time
mp.job_count_charts('p1', 'standalone', sdict, eg_colors,
                    eg_list=[1,2,3], ds_dict=ds_dict,
                    #attr1='lname', oper1='<=', val1='jones',
                    plot_egs_sep=True, plot_total=True, year_max=2050)

In [None]:
%%time
mp.emp_quick_glance(sample_emp, 'p1', ds_dict=ds_dict)

In [None]:
%%time
mp.single_emp_compare(sample_emp, 'cat_order',
                      ['p1', 'p2', 'p3', 'standalone'],
                      'date', job_strs, eg_colors,
                      p_dict, job_levels, adict,
                      chart_style='darkgrid',
                      ds_dict=ds_dict)

In [None]:
%%time
mp.job_time_change(['p1'], 'standalone', [1], job_colors,
                   sdict['job_strs_dict'], job_levels, adict,
                   'lspcnt', ds_dict=ds_dict, chart_style='ticks',
                   #attr1='ldate', oper1='<=', val1='1987-12-31',
                   bg_color='#ffffff',
                   xsize=12, ysize=8, experimental=False)

### slicing and groupby month

In [None]:
%%time
mp.group_average_and_median('p1', 'standalone', [1,2,3],
                            eg_colors, 'cat_order', job_levels,
                            sdict, adict, chart_style='ticks',
                            attr1='ldate', oper1='<=', val1='1989-12-31',
                            show_full_yscale=False,
                            ds_dict=ds_dict)

In [None]:
%%time
mp.group_average_and_median('p1', 'standalone', [1, 2, 3],
                            eg_colors, 'jnum', job_levels,
                            sdict, adict,
                            ds_dict=ds_dict,
                            chart_style='ticks',
                            attr1='age', oper1='>', val1='50')

In [None]:
%%time
# filter entire dataset to include only those employees who were 50 or older during month 0
job_slice = mp.slice_ds_by_filtered_index('p1', ds_dict=ds_dict, mnum=0, attr='age', attr_oper='>=', attr_val=50)

In [None]:
%%time
# use the filtered dataset from the cell above as input (job_slice).
# this chart shows average job category order number for employees who were at least 50 at month 0 and
# who have a longevity date in 1999 or earlier...
mp.group_average_and_median(job_slice, 'standalone', [1, 2, 3],
                            eg_colors, 'cat_order', job_levels,
                            sdict, adict,
                            attr1='ldate', oper1='<=', val1='1999-12-31',
                            ds_dict=ds_dict, job_labels=False)

In [None]:
%%time
mp.stripplot_eg_density('p1', 100, eg_colors,
                        mnum_order=True,
                        dot_size=2.6,
                        #attr1='age', oper1='>=', val1='62',
                        ds_dict=ds_dict, xsize=4)

In [None]:
p.columns

In [None]:
%%time
mp.job_count_bands(['standalone', 'p1', 'p2', 'p3'], [1,2,3], job_colors,
                   sdict, ds_dict=ds_dict, emp_list=sample_emp_list,
                   #attr1='ldate', oper1='<=', val1='1990-12-31',
                   chart_style='white', xsize=13, ysize=8)

In [None]:
%%time
mp.quantile_bands_over_time(p, 2, 'lspcnt', bins=40, quantile_ticks=False, cm_name='Set2')

In [None]:
%%time
# cat_order progression for the median of each 2.5% segment (40 quantiles)
# of employee group 2 (standalone dataset)
mp.quantile_groupby(['standalone'], [1,2], 'cat_order', 40,
                    eg_colors, job_colors, sdict, adict, job_dict,
                    ds_dict=ds_dict, show_job_bands=False,
                    num_cat_order_yticks=20,
                    custom_color=False,
                    through_date='2037-12-31')

In [None]:
%%time
# same as above, but as affected by integration proposal p1,
# with a delayed implementation date
mp.quantile_groupby(['p1'], [2], 'cat_order', 40,
                    eg_colors, job_colors, sdict, adict,
                    job_dict,
                    ds_dict=ds_dict,
                    num_cat_order_yticks=20,
                    show_job_bands=False, custom_color=False,
                    through_date='2037-12-31')

In [None]:
%%time
# same as above, but with the addition of job bands
mp.quantile_groupby(['p1'], [2], 'cat_order', 200,
                    eg_colors, job_colors, sdict, adict,
                    job_dict,
                    ds_dict=ds_dict,
                    num_cat_order_yticks=20,
                    show_job_bands=True, custom_color=False,
                    through_date='2037-12-31')

In [None]:
%%time
# cat_order progression for the median of each .4% segment (250 quantiles)
# of employee group 2 (p1 dataset)
# custom color example 
mp.quantile_groupby(['p3'], [1], 'cat_order', 200,
                    eg_colors, job_colors, sdict, adict,
                    job_dict,
                    groupby_method='median',
                    ds_dict=ds_dict, show_job_bands=True,
                    line_width=1, custom_color=True, chart_style='whitegrid',
                    line_alpha=.9,
                    num_cat_order_yticks=20,
                    show_grid=True, cm_name='tab20c',
                    through_date='2037-12-31')

In [None]:
%%time
# cat_order progression comparison for the median of each 10% segment (10 quantiles)
# of employee group 2 for standalone (solid lines) and p1 (dashed lines) datasets.
# custom color example 
mp.quantile_groupby(['standalone', 'p1'], [2], 'cat_order', 10,
                    eg_colors, job_colors, sdict, adict,
                    job_dict,
                    groupby_method='median',
                    ds_dict=ds_dict, show_job_bands=False,
                    num_cat_order_yticks=20,
                    line_width=2.5, custom_color=True, chart_style='whitegrid',
                    line_alpha=1,
                    show_grid=True, cm_name='Dark2',
                    through_date='2037-12-31')

### custom plot example, eg job count over time, standalone vs. proposal

In [None]:
%%time
eg_num = 2

fur_color = '#404040'
j_colors = job_colors[:]
j_colors[-1] = fur_color

stand = ds_dict['standalone']
p = ds_dict['p1']

base_jobs = stand[stand.eg==eg_num].groupby(['date', 'jnum']).size().astype(int).unstack()
p_jobs = p[p.eg==eg_num].groupby(['date', 'jnum']).size().astype(int).unstack()

base_jobs_cols = list(base_jobs.columns)
p_jobs_cols = list(p_jobs.columns)

#-----------------------------------------------------------------------
# Baseline job counts chart:
base_jobs.plot(color=list(j_colors[i - 1] for i in base_jobs_cols))
fig = plt.gcf()
fig.set_size_inches(12, 7)
ax = plt.gca()
ax.margins(x=0, y=0)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# legend position to right
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_title('baseline job counts, eg ' + str(eg_num))

#------------------------------------------------------------------------
# Proposal job counts chart:
p_jobs.plot(color=list(j_colors[i - 1] for i in p_jobs_cols))
fig = plt.gcf()
fig.set_size_inches(12, 7)
ax = plt.gca()
ax.margins(x=0, y=0)

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_title('proposal job counts, eg ' + str(eg_num))

plt.show()

## custom color list creator demo

Note:  the following cells are included only as an example of creating a custom colormap for charts...

In [None]:
%%time
# get all available colormap names
sorted(m for m in plt.cm.datad if not m.endswith("_r"))

In [None]:
# use a colormap name(s) within the cm_name_list variable
# the make_color_list function can do several things, here it returns color lists...
mp.make_color_list(num_of_colors=10, cm_name_list=['Paired', 'cool'])

In [None]:
# same colormap inputs, this time with seaborn palplot output.
# the function produces lists of colors which may then be used for plotting.
mp.make_color_list(num_of_colors=10, cm_name_list=['Paired', 'cool'], return_list=False)

seaborn method of viewing a custom color list:

In [None]:
sns.palplot([(0.65, 0.80, 0.89, 1.0), (0.12, 0.47, 0.70, 1.0), (0.69, 0.87, 0.54, 1.0)])

In [None]:
sns.palplot(['r', 'g', 'b'])

In [None]:
eg_color_dict = {1: 'k', 2: 'b', 3: 'red'}
prop_dict = {0: 'standalone', 1: 'p1', 2: 'p2', 3: 'p3', 4: 'edit', 5: 'hybrid'}

In [None]:
prop = 1

In [None]:
this_p = ds_dict[prop_dict[prop]].copy()

In [None]:
%%time
this_p['start_age'] = f.make_starting_val_column(this_p, 'age', inplace=False)

In [None]:
%%time
f.make_eg_pcnt_column(this_p)

In [None]:
this_p.columns

In [None]:
eg_list = [1, 2]

In [None]:
this_p.start_age.max()

In [None]:
tp = this_p[this_p.retdate <= '2025-12-31']
fig, ax = plt.subplots()
m1 = 'start_age'
m2 = 'jobp'

for eg in eg_list:
    filt_df = tp[(tp.eg==eg) & (tp.ret_mark==1)][[m1, m2]]
    filt_df.plot(x=m1, y=m2,
                 kind='scatter', ax=ax,
                 color=eg_color_dict[eg],
                 alpha=.15, s=60)
ax.invert_yaxis()
if m2 in ['jnum', 'jobp']:
    max_jnum = min(int(filt_df[m2].max()) + 2, 18)
    ax.set_yticks(np.arange(1, max_jnum, 1))
#ax.invert_xaxis()

In [None]:
tp = this_p[this_p.retdate <= '2045-12-31']
fig, ax = plt.subplots()
m1 = 'eg_start_pcnt'
m2 = 'cat_order'

for eg in eg_list:
    tp[(tp.eg==eg) & (tp.ret_mark==1)][[m1, m2]].plot(x=m1, y=m2,
                                                      kind='scatter', ax=ax,
                                                      color=eg_color_dict[eg],
                                                      alpha=.15, s=60)
ax.invert_yaxis()
ax.invert_xaxis()

In [None]:
np.unique(this_p[this_p.mnum==80]['jnum'], return_counts=True)

In [None]:
mp.cond_test('p1', [2], enhanced_jobs, job_colors, job_dict, [1, 3], ds_dict=ds_dict, max_mnum=110)

In [None]:
#uncomment lines below to run, this one takes a long time to calculate...

#sns.swarmplot(x='eg', y='cat_order', data=p[p.mnum==0]);
#plt.gca().invert_yaxis()
#plt.gcf().set_size_inches(10, 10)

In [None]:
chart_style = 'whitegrid'
with sns.axes_style(chart_style):
    fig, ax = plt.subplots()
sns.stripplot(x='eg', y='cat_order', data=p[(p.ret_mark==1) & (p.ldate <= '1987-12-31') & (p.retdate > '2017-12-31')], jitter=.5, size=5, alpha=.65, ax=ax);
fig.set_size_inches(10, 10)
ax.invert_yaxis()