### Plotting notebook

#### Most of the functions below have built-in filtering capability.  Uncomment lines which begin with 'attr1' to sample...

##### To view the docstrings for all plotting functions below, click within the name of the function and then use keyboard combination Shift+TAB.

imports and notebook, plotting configuration

In [None]:
%%time
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib import cm

import config as cf
import functions as f
import matplotlib_charting as mp

#plt.style.use('bmh')
#plt.style.use('fivethirtyeight')
sns.set_style("darkgrid")

%matplotlib inline
pd.set_option('notebook_repr_html', False)
pd.set_option('max_columns', 50)
pd.set_option('expand_frame_repr', True)
pd.set_option('max_rows', 10)
pd.set_option('display.precision', 3)
pd.options.display.width = 150
pd.options.display.max_colwidth=0

pd.set_option('display.float_format', lambda x: '%.4f' % x)

plt.rcParams['figure.figsize']=(12.0, 9.0)   
plt.rcParams['font.size']=14              

plt.rcParams['axes.labelsize']=16
plt.rcParams['xtick.labelsize']=14
plt.rcParams['ytick.labelsize']=14
plt.rcParams['axes.titlesize']=20
plt.rcParams['lines.linewidth']=3
plt.rcParams['legend.fontsize']=16
plt.rcParams['legend.fancybox']=True

read and assign variables for computed datasets

ds4 is standalone, others are sample proposals

In [None]:
%%time
ds_dict = f.load_datasets()

view of first 5 rows of proposal 1 (named 'p1' in Excel file 'proposals.xlsx'):

In [None]:
ds_dict['p1'][0].head()

assign reusable variables for functions below

In [None]:
%%time
job_levels = cf.num_of_job_levels

In [None]:
%%time
eg_colors = cf.eg_colors

In [None]:
%%time
job_colors = cf.job_colors

assign proposal variable (string)

In [None]:
%%time
proposal = 'p1'

assign p variable (eval proposal variable)

In [None]:
%%time
p = ds_dict[proposal][0]

view of columns in p dataset

In [None]:
p.columns

view of colors represented by config file job_colors list variable

In [None]:
sns.palplot(job_colors)

assign sample employee and employee list variables for some functions

these are median samples...

In [None]:
# sample employee - halfway down the list from first month
first_month = p[p.mnum==0]
sample_emp = first_month.iloc[int(len(first_month) / 2)].empkey

In [None]:
# sample employee list - median of each employee group
sample_emp_list = []
g = p[p.mnum==0][['eg', 'empkey']].groupby('eg')
egs = pd.unique(p.eg)
for eg in egs:
    sample_emp_list.append(g.get_group(eg).iloc[int(len(g.get_group(eg)) * .5)].empkey)
g =[]

In [None]:
%%time
# uncomment the two lines of arguments below for an example of attribute filtering
mp.quartile_years_in_position('p1', 'standalone', job_levels, 40, cf.job_strs, cf.eg_dict,
                              job_colors, ds_dict=ds_dict, flip_x=True, flip_y=False,
                              ysize=16, xsize=12, plot_differential=True,
                              #attr1='ldate', oper1='<=', val1='1997-12-31',
                              #attr2='ldate', oper2='>=', val2='1986-12-31',
                              rotate=True, custom_color=False, cm_name='Set1',
                              normalize_yr_scale=True, gain_loss_bg=True)

age vs. list percentage for a specific month

In [None]:
%%time
mnum = 24  # month number
mp.age_vs_spcnt('p1', [1,2,3], mnum, eg_colors, cf.eg_dict, ds_dict=ds_dict,
                # attr1='ldate', oper1='<=', val1='1999-12-31',
                chart_example=False)

employees from each group holding a specific job level

In [None]:
%%time
jnum = 4  # job number
job_p = p[p.jnum==jnum]
mp.age_vs_spcnt(job_p, [1,2,3], 42, eg_colors, cf.eg_dict, ds_dict=ds_dict, chart_example=False)

In [None]:
%%time
mp.multiline_plot_by_emp('p1', 'mpay', 'spcnt', sample_emp_list, job_levels,
                         eg_colors, cf.job_strs, ds_dict=ds_dict, chart_example=False)

In [None]:
%%time
mp.multiline_plot_by_emp('p1', 'jobp', 'date', sample_emp_list,
                         job_levels, eg_colors, cf.job_strs, ds_dict=ds_dict, chart_example=False)

In [None]:
%%time
mp.multiline_plot_by_emp('p1', 'cat_order', 'date', sample_emp_list, job_levels,
                         cf.eg_colors, cf.proposal_dict, ds_dict=ds_dict)

In [None]:
%%time
mp.multiline_plot_by_eg('p1', 'jobp', 'lspcnt', [1,2,3], cf.job_strs,
                        job_levels, eg_colors, ds_dict=ds_dict,
                        #attr1='ldate', oper1='>=', val1='1999-12-31',
                        mnum=0, scatter=True,
                        exclude_fur=False, full_pcnt_xscale=True, chart_example=False)

In [None]:
%%time
mp.multiline_plot_by_eg('p1', 'cat_order', 'lspcnt', [1,2,3], cf.job_strs,
                        job_levels, eg_colors, ds_dict=ds_dict, mnum=20, scatter=False,
                        exclude_fur=False, full_pcnt_xscale=True)

In [None]:
%%time
mp.violinplot_by_eg(p, 'age', ds_dict=ds_dict,
                    mnum=0, scale='count', chart_example=False)

In [None]:
%%time
mp.age_kde_dist('p1', eg_colors, cf.eg_dict,
                ds_dict=ds_dict, mnum=0, chart_example=False)

In [None]:
%%time
mp.eg_diff_boxplot(['p1', 'p2', 'p3'], 'standalone', [1, 2, 3],
                   ds_dict=ds_dict,
                   #attr1='ldate', oper1='<=', val1='1986-12-31',
                   measure='spcnt', comparison='',
                   year_clip=2035, chart_example=False)

In [None]:
%%time
mp.eg_boxplot(['p1', 'p2', 'p3'], [1, 2, 3],
              #attr1='doh', oper1='<=', val1='2000-12-31',
              measure='jobp', ds_dict=ds_dict)

In [None]:
%%time
mp.stripplot_distribution_in_category('p1', job_levels, 40,
                                      cf.full_time_pcnt1, eg_colors,
                                      job_colors, cf.jobs_dict,
                                      cf.eg_dict_verbose, ds_dict=ds_dict, bg_alpha=.15,
                                      #attr1='ldate', oper1='<=', val1='1998-12-31',
                                      chart_example=False)

In [None]:
%%time
mp.job_level_progression('p1', sample_emp_list, '2030-12-31', 
                         ds_dict=ds_dict, alpha=.1,
                         chart_example=False)

In [None]:
%%time
mp.differential_scatter(['p1', 'p2', 'p3'], 'standalone', 'spcnt', 'age',
                        65, [1, 2, 3], ds_dict=ds_dict, prop_order=True,
                        #attr1='ldate', oper1='<=', val1='1997-12-31',
                        show_scatter=True, show_lin_reg=False,
                        show_mean=False, mean_len=80,
                        dot_size=20, lin_reg_order=12,
                        ylimit=False, ylim=5,
                        bright_bg=False,
                        chart_style='darkgrid', chart_example=False)

In [None]:
%%time
mp.parallel([p, 'p2', 'p3'], 'standalone', [1, 2, 3],
            'lspcnt', [0, 40, 80, 120],
            job_levels,
            ds_dict=ds_dict,
            #attr1='ldate', oper1='<=', val1='1997-12-31',
            left=0, xsize=5, ysize=5)

In [None]:
%%time
mp.job_grouping_over_time('p1', [1,2,3], cf.job_strs,
                          job_colors, rets_only=True, ds_dict=ds_dict,
                          #attr1='ldate', oper1='<=', val1='1997-12-31',
                          time_group='A', plt_kind='bar',
                          display_yrs=40, legend_loc=4, xsize=11,
                          ysize=6, chart_example=False)

integrated distribution of all jobs, all employee groups

In [None]:
%%time
mp.rows_of_color('p1', 22, ['jnum'], cf.eg_colors,
                 job_colors, ds_dict=ds_dict, cols=120, job_only=False, jnum=4,
                 cell_border=True, eg_border_color='.9', eg_list=[1,2,3],
                 job_border_color='.75', xsize=12, ysize=9,
                 chart_example=False)

In [None]:
%%time
mp.rows_of_color('p1', 60, ['eg', 'fur'], cf.eg_colors,
                 job_colors, ds_dict=ds_dict, cols=150, job_only=False, jnum=6,
                 cell_border=True, eg_border_color='.3',
                 job_border_color='.55', xsize=12, ysize=9,
                 chart_example=True)

In [None]:
%%time
mp.rows_of_color('p1', 60, ['eg'], cf.eg_colors,
                 job_colors, ds_dict=ds_dict, cols=100, job_only=True, jnum=4,
                 #attr1='age', oper1='>=', val1='60',
                 cell_border=True, eg_border_color='.6',
                 job_border_color='.85', xsize=12, ysize=9, chart_example=False)

In [None]:
%%time
mp.rows_of_color('p1', 40, ['eg', 'fur'], cf.eg_colors,
                 job_colors, ds_dict=ds_dict, cols=150, job_only=True, jnum=4,
                 #attr1='ldate', oper1='<=', val1='1987-12-31',
                 cell_border=True, eg_border_color='.3',
                 job_border_color='.55', xsize=12, ysize=9,
                 chart_example=True)

In [None]:
%%time
mp.quartile_bands_over_time('p2', 2, 'spcnt', bins=20, ds_dict=ds_dict,
                            clip=True, year_clip=2035, kind='area',
                            quartile_ticks=False, custom_color=True,
                            cm_name='Set1', quartile_alpha=.6,
                            grid_alpha=.5, custom_start=0,
                            custom_finish=.75, xsize=10, ysize=8,
                            alt_bg_color=False, bg_color='#faf6eb')

In [None]:
%%time
mp.job_transfer('p1', 'standalone',
                1, job_colors, job_levels, ds_dict=ds_dict,
                custom_color=False, gb_period='M')

In [None]:
%%time
mp.eg_multiplot_with_cat_order('p2', 40,
                               'cat_order', 'lspcnt',
                               cf.job_strs, job_colors,
                               job_levels, ds_dict=ds_dict,
                               num=1,
                               single_eg=False, tick_fontsize=11)

In [None]:
%%time
mp.diff_range(['p1', 'p2', 'p3'], 'standalone', 'jobp', [1, 2, 3],
              ds_dict=ds_dict,
              #attr1='ldate', oper1='<=', val1='1991-12-31',
              #attr2='ldate', oper2='>=', val2='1985-12-31',
              show_range=False, show_mean=True,
              year_clip=2030)

In [None]:
%%time
mp.job_count_charts('p1', 'standalone', eg_list=[1, 2, 3], ds_dict=ds_dict,
                    #attr1='lname', oper1='<=', val1='jones',
                    plot_egs_sep=True, plot_total=True)

In [None]:
%%time
mp.emp_quick_glance(sample_emp, 'p1', ds_dict=ds_dict)

In [None]:
%%time
mp.single_emp_compare(sample_emp, 'cat_order', ['p1', 'p2', 'p3', 'standalone'],
                      'date', cf.job_strs, eg_colors,
                      cf.eg_dict, job_levels, ds_dict=ds_dict, chart_example=False)

In [None]:
%%time
mp.job_time_change(['p1'], 'standalone', [1, 2, 3],
                   'spcnt', ds_dict=ds_dict,
                   #attr1='ldate', oper1='<=', val1='1987-12-31',
                   colors=cf.job_colors,
                   job_list=np.arange(cf.num_of_job_levels, 0, -1),
                   jobs_dict=cf.jobs_dict)

### slicing and groupby month

In [None]:
%%time
mp.group_average_and_median('p1', 'standalone', [1, 2, 3],
                            eg_colors, 'cat_order', job_levels,
                            cf.jobs_dict,
                            #attr1='ldate', oper1='<=', val1='1999-12-31',
                            ds_dict=ds_dict, job_labels=False)
                            

In [None]:
%%time
mp.group_average_and_median('p1', 'standalone', [1, 2, 3],
                            eg_colors, 'jnum', job_levels,
                            cf.jobs_dict, ds_dict=ds_dict,
                            #attr1='age', oper1='>', val1='50'
                            job_labels=False)

In [None]:
%%time
mp.stripplot_eg_density('p1', 40, eg_colors,
                        #attr1='age', oper1='>=', val1='62',
                        ds_dict=ds_dict, xsize=4)

In [None]:
mp.job_count_bands(['standalone', 'p1', 'p2', 'p3'], [1, 2, 3], cf.job_colors, ds_dict=ds_dict,
                   #attr1='ldate', oper1='<=', val1='1990-12-31',
                   chart_style='whitegrid')

In [None]:
df = ds_dict['p1'][0]

In [None]:
df[df.date <= '1997']

# Make colormap

Note:  the following 3 cells are included only as an example of creating a custom colormap for charts...

The cm_name variable must be a valid matplotlib colormap name

In [None]:
cm_name='Paired'
num_of_colors = 8 + 1
cm_subsection = np.linspace(0, .75, num_of_colors)
colormap = eval('cm.' + cm_name)
colors = [colormap(x) for x in cm_subsection]

In [None]:
colors

seaborn method of viewing colors from list

In [None]:
sns.palplot(colors)

### custom plot example, eg job count over time, standalone vs. proposal

In [None]:
%%time
eg_num = 1

fur_color = '#404040'
j_colors = job_colors[:]
j_colors.append(fur_color)

stand = ds_dict['standalone'][0]
p = ds_dict['p1'][0]

base_jobs = stand[stand.eg==eg_num].groupby(['date', 'jnum']).size().astype(int).unstack()
p_jobs = p[p.eg==eg_num].groupby(['date', 'jnum']).size().astype(int).unstack()

base_jobs_cols = list(base_jobs.columns)
p_jobs_cols = list(p_jobs.columns)

base_jobs.plot(color=list(j_colors[i - 1] for i in base_jobs_cols))
p_jobs.plot(color=list(j_colors[i - 1] for i in p_jobs_cols))

plt.show()