### Plotting notebook

##### To view the docstrings for all plotting functions below, click within the name of the function and then use keyboard combination Shift+TAB.

imports and notebook, plotting configuration

In [None]:
%%time
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib import cm

import config as cf
import functions as f
import matplotlib_charting as mp

#plt.style.use('bmh')
#plt.style.use('fivethirtyeight')
sns.set_style("darkgrid")

%matplotlib inline
pd.set_option('notebook_repr_html', False)
pd.set_option('max_columns', 50)
pd.set_option('expand_frame_repr', True)
pd.set_option('max_rows', 10)
pd.set_option('display.precision', 3)
pd.options.display.width = 150
pd.options.display.max_colwidth=0

pd.set_option('display.float_format', lambda x: '%.4f' % x)

plt.rcParams['figure.figsize']=(12.0, 9.0)   
plt.rcParams['font.size']=14              

plt.rcParams['axes.labelsize']=16
plt.rcParams['xtick.labelsize']=14
plt.rcParams['ytick.labelsize']=14
plt.rcParams['axes.titlesize']=20
plt.rcParams['lines.linewidth']=3
plt.rcParams['legend.fontsize']=16
plt.rcParams['legend.fancybox']=True

read and assign variables for computed datasets

ds4 is standalone, others are sample proposals

In [None]:
%%time
ds1 = pd.read_pickle('dill/ds1.pkl')
ds2 = pd.read_pickle('dill/ds2.pkl')
ds3 = pd.read_pickle('dill/ds3.pkl')
ds4 = pd.read_pickle('dill/stand.pkl')

view of first 5 rows of ds1

In [None]:
ds1.head()

assign reusable variables for functions below

In [None]:
%%time
job_levels = cf.num_of_job_levels

In [None]:
%%time
eg_colors = cf.eg_colors

In [None]:
%%time
job_colors = cf.job_colors

assign formatter variable

In [None]:
%%time
formatter = FuncFormatter(mp.to_percent)

assign proposal variable (string)

In [None]:
%%time
proposal = 'ds1'

assign p variable (eval proposal variable)

In [None]:
%%time
p = eval(proposal)

view of columns in p dataset

In [None]:
p.columns

view of colors represented by config file job_colors list variable

In [None]:
sns.palplot(job_colors)

assign employee list variable for some functions

these are random samples...

In [None]:
emp_list = [10011538, 10013767, 20011485, 30010789]

In [None]:
x = np.array([1,2,3,2,3,6,54,7,6,5,6,5,4,5,3,2,7,np.nan,1])

In [None]:
x[x > 3]

In [None]:
np.unique(x[~np.isnan(x)]).astype(int) - 1

In [None]:
z = np.array([0,1,3,6,8]).astype(int)

In [None]:
import matplotlib.patches as mpatches
recs = []
for i in z:
    recs.append(mpatches.Rectangle((0, 0), 1, 1,
                fc=job_colors[i],
                alpha=1))

In [None]:
recs

In [None]:
fig, ax = plt.subplots()
legend_labels = ['one', 'two', 'three', 'four', 'five']
for i in range(5):
    plt.scatter(i, i + 1)

ax.legend(recs, legend_labels,
          bbox_to_anchor=(1.15, 0.7),
          fontsize=14)

In [None]:
test = [1,2,3]

In [None]:
test.remove(2)

In [None]:
test

In [None]:
sns.palplot(cf.row_colors)

In [None]:
sns.palplot(cf.job_colors)

In [None]:
mp.rows_of_color(proposal, ds1, 0, ['eg'], cf.row_colors,
                 job_colors, cols=160, job_only=False, jnum=1,
                 cell_border=True, eg_border_color='.2',
                 job_border_color='.85', xsize=18, ysize=12, chart_example=True)

In [None]:
%%time
mp.quartile_years_in_position(p, ds4, job_levels, 40, cf.job_strs, proposal, cf.proposal_dict, cf.eg_dict,
                              job_colors, flip_x=True, flip_y=False, ysize=16, xsize=12, plot_differential=True,
                              rotate=True, custom_color=False, cm_name='Set1', normalize_yr_scale=True, gain_loss_bg=True)

age vs. list percentage for a specific month

In [None]:
mnum = 80
mp.age_vs_spcnt(p, [1,2,3], mnum, eg_colors, cf.eg_dict, proposal, cf.proposal_dict, formatter, chart_example=False)

employees from each group holding a specific job level

In [None]:
%%time
jnum = 4
job_p = p[p.jnum==jnum]
mp.age_vs_spcnt(job_p, [1,2,3], 42, eg_colors, cf.eg_dict, proposal, cf.proposal_dict, formatter, chart_example=False)

In [None]:
%%time
emps = [10011538, 10013767, 20011485, 30010789]
mp.multiline_plot_by_emp(p, 'mpay', 'spcnt', emps, job_levels, eg_colors, cf.job_strs, proposal, cf.proposal_dict, formatter, chart_example=False)

In [None]:
prop = 'ds1'
mp.multiline_plot_by_emp(eval(prop), 'jobp', 'date', emp_list, job_levels,
                         eg_colors, cf.job_strs, prop, cf.proposal_dict, formatter, chart_example=False)

In [None]:
mp.multiline_plot_by_emp(ds1, 'cat_order', 'date', [20011327], job_levels, job_colors, cf.jobs_dict, 'ds1', cf.proposal_dict, formatter)

In [None]:
%%time
mp.multiline_plot_by_eg(ds3, 'jobp', 'lspcnt', [1,2,3], cf.job_strs, proposal, cf.proposal_dict, job_levels,
                        eg_colors, formatter, mnum=0, scatter=True, exclude_fur=False, full_pcnt_xscale=True, chart_example=False)

In [None]:
%%time
mp.multiline_plot_by_eg(p, 'cat_order', 'lspcnt', [1,2,3], cf.job_strs, proposal, cf.proposal_dict, job_levels,
                        eg_colors, formatter, mnum=20, scatter=False, exclude_fur=False, full_pcnt_xscale=True)

In [None]:
%%time
mp.violinplot_by_eg(p, 'age', proposal, cf.proposal_dict, formatter, mnum=0, scale='count', chart_example=False)

In [None]:
%%time
mp.age_kde_dist(p, eg_colors, cf.eg_dict, mnum=0, chart_example=False)

In [None]:
%%time
mp.eg_diff_boxplot([ds1, ds2, ds3], ds4, [1, 2, 3], formatter, measure='spcnt',
                   comparison='standalone', xsize=16, year_clip=2035, chart_example=False)

In [None]:
%%time
mp.stripplot_distribution_in_category(ds2, job_levels, 36, cf.full_time_pcnt1, eg_colors, job_colors,
                                      cf.jobs_dict, cf.eg_dict_verbose, bg_alpha=.15, chart_example=True)

In [None]:
%%time
mp.job_level_progression(p, emp_list, '2030-12-31', job_levels,
                          eg_colors, job_colors,
                          cf.eg_counts, cf.j_changes, alpha=.1, chart_example=False)

In [None]:
mp.differential_scatter(ds4, [ds1, ds2, ds3], 'spcnt', 'age', 65, [1, 2, 3], 
                        formatter, prop_order=True,
                        show_scatter=True, show_lin_reg=False, show_mean=False,
                        mean_len=80,
                        dot_size=20, lin_reg_order=12,
                        ylimit=False, ylim=5,
                        width=15, height=10, bright_bg=False, chart_style='darkgrid', chart_example=True)

In [None]:
prop = 'ds1'
mp.job_grouping_over_time(eval(prop), prop, [1,2,3], cf.job_strs, job_colors,
                          formatter, rets_only=True, time_group='A', plt_kind='bar',
                          measure_subset='jobp', measure_val=3, measure_val2=200, operator='greater_than',
                          display_yrs=40, legend_loc=4, xsize=11, ysize=6, chart_example=False)

In [None]:
%%time
mp.parallel(ds4, [ds1, ds2, ds3], [1, 2, 3], 'lspcnt', [0, 40, 80, 120], job_levels, formatter,
            left=0, xsize=5, ysize=5)

integrated distribution of all jobs, all employee groups

In [None]:
mp.rows_of_color(proposal, p, 0, ['jnum'], cf.row_colors,
                 job_colors, cols=150, job_only=False, jnum=6,
                 cell_border=True, eg_border_color='.3', eg_list=[1],
                 job_border_color='.55', xsize=12, ysize=9, chart_example=True)

In [None]:
mp.rows_of_color(proposal, p, 60, ['eg', 'fur'], cf.row_colors,
                 job_colors, cols=150, job_only=False, jnum=6,
                 cell_border=True, eg_border_color='.3', job_border_color='.55', xsize=12, ysize=9, chart_example=True)

In [None]:
mp.rows_of_color(proposal, p, 60, ['eg', 'fur'], cf.row_colors,
                 job_colors, cols=150, job_only=True, jnum=6,
                 cell_border=True, eg_border_color='.3', job_border_color='.55', xsize=12, ysize=9, chart_example=True)

In [None]:
mp.quartile_bands_over_time(p, 3, 'spcnt', formatter, bins=20, clip=True, year_clip=2035, kind='area', quartile_ticks=False,
                             custom_color=True, cm_name='Set1', quartile_alpha=.6, grid_alpha=.5, custom_start=0, custom_finish=.75,
                             xsize=10, ysize=8, alt_bg_color=False, bg_color='#faf6eb')

In [None]:
prop = 'ds1'
base = 'ds4'
mp.job_transfer(eval(prop), prop, eval(base), base, 3, job_colors, job_levels, custom_color=False, gb_period='M')

In [None]:
%%time
proposal = 'ds1'
mp.eg_multiplot_with_cat_order(eval(proposal), proposal, 40, 'cat_order', 'lspcnt', formatter, cf.proposal_dict, cf.job_strs, job_colors, job_levels)

In [None]:
mp.diff_range([ds1, ds2, ds3], ds4, 'cpay', [1,2,3], [1, 2, 3], formatter, 'Q', show_range=False, show_mean=True, year_clip=2030, xsize=7, ysize=5)

In [None]:
%%time
mp.job_count_charts(p, ds4, plot_egs_sep=False)

In [None]:
%%time
empnum = 20011327
mp.emp_quick_glance(empnum, ds3)

In [None]:
%%time
mp.single_emp_compare(20010360, 'jobp', [ds1, ds2, ds3, ds4], 'date', formatter, cf.job_strs, eg_colors, cf.eg_dict, job_levels, chart_example=True)

In [None]:
mp.job_time_change(ds4, [ds1], [1, 2, 3],
                    'spcnt', colors=cf.job_colors,
                    job_list=np.arange(cf.num_of_job_levels, 0, -1),
                    jobs_dict=cf.jobs_dict)

### slicing and groupby month

In [None]:
mp.group_average_and_median(ds1, 'ds1', ds4, 'ds4', [1, 2, 3], eg_colors,
                             'cat_order', job_levels, cf.jobs_dict, cf.proposal_dict,
                             attr1='ldate',
                             oper1='<=', val1='1990-12-31')

In [None]:
mp.group_average_and_median(ds1, 'ds1', ds4, 'ds4', [1, 2, 3], eg_colors,
                             'jnum', job_levels, cf.jobs_dict, cf.proposal_dict,
                            job_labels=False,
                             attr1='age',
                             oper1='>', val1='50')

In [None]:
%%time
mp.stripplot_eg_density(p, 40, eg_colors, xsize=4)

# Make colormap

Note:  the following 3 cells are included only as an example of creating a custom colormap for charts...

The cm_name variable must be a valid matplotlib colormap name

In [None]:
cm_name='Paired'
num_of_colors = 8 + 1
cm_subsection = np.linspace(0, .75, num_of_colors)
colormap = eval('cm.' + cm_name)
colors = [colormap(x) for x in cm_subsection]

In [None]:
colors

seaborn method of viewing colors from list

In [None]:
sns.palplot(colors)

### custom plot example, eg job count over time, standalone vs. proposal

In [None]:
%%time
base_jobs = ds4[ds4.eg==1].groupby(['date', 'jnum']).size().astype(int).unstack()
p_jobs = p[p.eg==2].groupby(['date', 'jnum']).size().astype(int).unstack()

base_jobs_cols = list(base_jobs.columns)
p_jobs_cols = list(p_jobs.columns)

base_jobs.plot(color=list(job_colors[i - 1] for i in base_jobs_cols))
p_jobs.plot(color=list(job_colors[i - 1] for i in p_jobs_cols))

plt.show()