In [None]:
import pandas as pd
import numpy as np
import os
import string
import glob
from datetime import datetime as dt
import datetime
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('max_rows', 500)

# Statewide Operations Productivity

Semi-structured Excel files are automatically emailed to `paul.washburn@majorbrands.com` on the 15th and last day of each month.  The data contains all hours for each worker in the warehouse.

**Note:  Negative records are filtered out of the data as they were not associated with valid `employee_id`s.  These records are few in number, but this must be understood from a high-level.**

In [None]:
from datetime import datetime as dt

base_dir = 'C:/users/pmwash/Desktop/Re-Engineered Reports/Day Hours/'

def drop_unnecessary_characters(str_list):
    str_list = [str(s).lower().replace(' ', '_') for s in str_list]
    str_list = [str(s).lower().replace('-_', '') for s in str_list]
    return str_list

def replace_unnamed_and_nans(col_list):
    new_col_list = list()
    for col in col_list:
        col = str(col)
        if '|nan' in col:
            newcol = col.replace('|nan', '')
            new_col_list.append(newcol)
        elif 'unnamed:_' in col:
            newcol = col.replace('unnamed:_', col_group)
            newcol = ''.join(c for c in newcol if not c.isdigit())
            new_col_list.append(newcol)
        else:
            new_col_list.append(col)
            col_group = col.split('|')[0]
    return new_col_list

def adjust_roster_id(roster_emp_id):
    fixed_id = [str(s)[:3] + str(s)[4:] for s in roster_emp_id]
    return fixed_id

def preprocess_hr_data(file_path):
    '''Accepts path to the export from ADP from HR which is emailed
    twice per month.'''
    df = pd.read_csv(file_path, skiprows=8)
    
    # clean up column names
    df.loc[0] = col_specifier = drop_unnecessary_characters(df.loc[0])
    df.columns = drop_unnecessary_characters(df.columns)
    col_list = [a +'|'+ b for a,b in zip(df.columns, col_specifier)]
    df.columns = replace_unnamed_and_nans(col_list)
    df.drop(index=0, inplace=True)
    
    # set data types to numeric after removing miscellaneous symbols
    non_numeric_cols = ['labor_level_selected', 'employee_id', 'employee_name']
    numeric_cols = [col for col in df.columns if col not in non_numeric_cols]
    for col in numeric_cols:
        # drop accounting symbols in currency
        df[col] = df[col].str.replace('$', '')
        df[col] = df[col].str.replace(',', '')
        df[col] = df[col].str.replace('(', '-')
        df[col] = df[col].str.replace(')', '')
        df[col] = df[col].astype(np.float32)
        
    # capture date from the file name
    dat = file_path.split('Worked ')[1]
    df['starting_date'] = dt.strptime(dat.split(' - ')[0], '%m%d%Y')
    
    # set indices
    df['year'] = df.starting_date.apply(lambda x: x.year)
    non_numeric_cols = ['starting_date'] + non_numeric_cols
    df.set_index(non_numeric_cols, inplace=True)
    df.index = df.index.droplevel('employee_name') #drop names for privacy
    
    # map in semantics for labor level
    labor_level_dict = {'/50/5220////' : 'Shipping Repacking',
                        '/50/6502////' : 'Shipping Wages',
                        '/50/6513////' : 'Shipping Casual', 
                        '/70/5220////' : 'Warehouse Repacking',
                        '/70/7202////' : 'Warehouse Wages',
                        '/70/7214////' : 'Warehouse Casual',
                        '/70/7201////' : 'Warehouse Management'}
    df['labor_level'] = df.index.get_level_values('labor_level_selected')
    df.labor_level = df.labor_level.map(labor_level_dict)
    df['month'] = dat_ix = df.index.get_level_values('starting_date')
    df.month = df.month.apply(lambda d: format(d, '%B'))
    df['pay_period'] = ['01' if str(d).split('-')[-1].split(' ')[0]=='01' else '02' for d in dat_ix]
    df['month_period'] = df.month.astype(str) + '_' + df.pay_period.astype(str)
    
    df.reset_index(inplace=True)
    df['employee_id'] = df['employee_id'].astype(str)
    df['employee_id'] = df['employee_id'].str.replace(' ', '').str.upper()
    df['employee_id'] = adjust_roster_id(df['employee_id'])
    
    # drop negative values
    df = df.loc[df['total|wages'] > 0]
    df['night_crew'] = df['labor_level'].apply(lambda s: 'Shipping' in str(s))
    
    return df

file_list = glob.glob(base_dir + '*.csv')
ops_hours_df = pd.DataFrame()
for file in file_list:
    ops_hours_df = ops_hours_df.append(preprocess_hr_data(file))

In [None]:
def fetch_operations_roster(fpath):
    roster = pd.read_csv(fpath)
    roster.columns = drop_unnecessary_characters(roster.columns)
    roster.rename(columns={'position_id': 'employee_id'}, inplace=True)
    roster['employee_id'] = roster['employee_id'].astype(str)
    roster['employee_id'] = roster['employee_id'].str.replace(' ', '').str.upper()
    roster.drop(columns=['first_name', 'last_name'], inplace=True)
    return roster

def merge_roster_with_adp(roster_fpath, ops_hours_df, verbose=1):
    '''
    Combines ADP data with Roster from HR.
    '''
    # read in roster data
    roster_df = fetch_operations_roster(roster_fpath)

    # merge with ops_hours_df
    rows_before = ops_hours_df.shape[0]
    ops_df = ops_hours_df.merge(roster_df, on='employee_id', how='left')
    rows_after = ops_df.shape[0]

    # check what got dropped
    notinnewdata = ~ops_hours_df.employee_id.isin(ops_df.employee_id.tolist())
    dropped_from_data = ops_hours_df.loc[notinnewdata, 'employee_id'].unique().tolist()
    
    if verbose:
        print('Roster Columns:')
        print(roster_df.columns.tolist())
        print('''
        Merging in EMPLOYEE ROSTER data from HR.  

        This file needs to be updated by hand each month from ADAM COLEMAN or whoever is in that role. 

        Rows before merging in Roster:    {}
        Rows after merging in Roster:     {}

        The following Employee IDs were dropped in this process:
        {}
        '''.format(rows_before, rows_after, dropped_from_data))

    # add in which warehouse they are in
    manager_dict = {'Manning, Travis': 'STL', 
                    'nan': 'STL', 
                    'Hercher, Donald': 'STL', 
                    'Coffer, Wesley': 'KC',
                    'Surls, Kurtis': 'KC', 
                    'Ade, Richard': 'KC', 
                    'Jorgensen, Skylar': 'KC'}
    ops_df['warehouse'] = ops_df.reports_to_name.map(manager_dict)
    
    return ops_df

roster_fpath = base_dir + 'lookup data/operations_roster_03202018.csv'
ops_df = merge_roster_with_adp(roster_fpath, ops_hours_df, verbose=0)
#ops_df.set_index(['warehouse', 'reports_to_name', 'employee_id', 'month_period']).head()

In [None]:
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

def plot_by_month(df, y, title, 
               seperate_y_axis=False, 
               x_axis_label='', y_axis_label='', 
               scale='linear', initial_hide=False,
               barmode='group'):
    '''
    Plot variables by month
    '''
    df = df.loc[:, y + ['month']].groupby('month')[y].sum()
    
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    for col in df.columns:
        df[col] = df[col].apply(lambda x: round(x, 2))
    print(title); print('-' * 100); print(df.T)
    
    
    layout = go.Layout(
        barmode = barmode,
        title = title,
        legend = dict(orientation="h"),
        xaxis = dict(type='month',
                  title='Month'),
        yaxis=dict(
            title = y_axis_label,
            showticklabels = not seperate_y_axis,
            type = scale
        ),
        bargap=0.2
    )
    
    y_axis_config = dict(
        overlaying = 'y',
        showticklabels = False,
        type = scale )
    
    visibility = 'visible'
    if initial_hide:  visibility = 'legendonly'
        
    # make a trace for each series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Bar( #go.Scatter
            x=series.index, 
            y=series, 
            text=title, 
            name=label_arr[index],
            visible=visibility,
            opacity=0.7
        )
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)
    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)
    print('\n')

    
def interactive_bar_plot(ops_df, y_list, subgroup, y_axis_label='Total Wages ($)'):
    for grp, df in ops_df.groupby(['warehouse', subgroup]):
        title = str(grp).replace('(', '').replace(')', '').replace('\'', '')
        plot_by_month(df, 
                      y=y_list, 
                      title=title, 
                      seperate_y_axis=False, 
                      y_axis_label=y_axis_label, 
                      scale='linear',
                      initial_hide=False)

In [None]:
import glob
from datetime import datetime as dt
import datetime
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

def generate_calendar(year):
    from pandas.tseries.offsets import YearEnd
    from pandas.tseries.holiday import USFederalHolidayCalendar
    
    start_date = pd.to_datetime('1/1/'+str(year))
    end_date = start_date + YearEnd()
    DAT = pd.date_range(str(start_date), str(end_date), freq='D')
    WK = [d.strftime('%U') for d in DAT]
    MO = [d.strftime('%B') for d in DAT]
    holidays = USFederalHolidayCalendar().holidays(start=start_date, end=end_date)

    DAYZ = pd.DataFrame({'Date':DAT, 'WeekNumber':WK, 'Month':MO})
    
    DAYZ['Year'] = [format(d, '%Y') for d in DAT]
    DAYZ['Weekday'] = [format(d, '%A') for d in DAT]
    DAYZ['DOTM'] = [format(d, '%d') for d in DAT]
    DAYZ['IsWeekday'] = DAYZ.Weekday.isin(['Monday','Tuesday','Wednesday','Thursday','Friday'])
    DAYZ['IsProductionDay'] = DAYZ.Weekday.isin(['Tuesday','Wednesday','Thursday','Friday'])
    last_biz_day = [str(format(dat, '%Y-%m-%d')) for dat in pd.date_range(start_date, end_date, freq='BM')]
    DAYZ['LastSellingDayOfMonth'] = [dat in last_biz_day for dat in DAYZ['Date'].astype(str)]

    DAYZ.loc[DAYZ.WeekNumber.isin(['00','01','02','03','04','05','06','07','08','09','50','51','52','53']), 'Season'] = 'Winter'
    DAYZ.loc[DAYZ.WeekNumber.isin(['10','11','12','13','14','15','16','17','18','19','20','21','22']), 'Season'] = 'Spring'
    DAYZ.loc[DAYZ.WeekNumber.isin(['23','24','25','26','27','28','29','30','31','32','33','34','35']), 'Season'] = 'Summer'
    DAYZ.loc[DAYZ.WeekNumber.isin(['36','37','38','39','40','41','42','43','44','45','46','47','48','49']), 'Season'] = 'Autumn'
    DAYZ['Holiday'] = DAYZ.Date.isin(holidays)
    DAYZ['HolidayWeek'] = DAYZ['Holiday'].rolling(window=7,center=True,min_periods=1).sum()
    DAYZ['ShipWeek'] = ['A' if int(wk) % 2 == 0 else 'B' for wk in WK]

    DAYZ.reset_index(drop=True, inplace=True)
    
    return DAYZ


def as400_date(dat):
    '''Accepts list of dates as strings from theAS400'''
    return [pd.to_datetime(dt.date(dt.strptime(d[-6:], '%y%m%d'))) for d in dat]

def order_fulfillment_rate(all_files, year=2018, verbose=0):
    '''Reads MTC1 via pw_custseg query files stored on local 
    machine to extract order fulfillment rate'''
    
    DF_OUT = pd.DataFrame()
    for file in all_files:
        # Specify datatypes from start to avoid issues downstream
        if verbose: print('Reading file:\n %s' %file); print('-'*100)
        dtypes = col_names = {'#MCUS#':str,'#MIVDT':str,'#MIVND':str,'#MLIN#':str,'#MPRD#':str,'#MQTYS':np.int64,
                    'CSCRDT':str,'CCRLIM':np.float64,'CONPRM':str,'CUSPMC':str,'CDDAY':str,
                    '#MEXT$':np.float64,'CTERM@':str,'#MCOS$':np.float64,'CSTOR#':str,'#MCHN#':str,'#MCUSY':str,
                    '#MSLSP':str,'#MQPC':np.int64,'#MCLA@':str,'#MSIZ@':str,'#MBRND':str,'#MQTY@':str,
                    '#MCMP':str,'#MSUPL':str,'#MCALL':str,'#MPRIO':str,'#MINP#':str,
                    '#CSTDTE':str,'CUDSCC':str,'CSHP':str,'CADMBR':str,'#MQTYO':np.float64,'#MTRCD':str,'#MCMRC':str}
        c = pd.read_csv(file, header=0, dtype=dtypes)

        ## Rename columns to make sense
        col_names = {'#MCUS#':'CustomerId','#MIVDT':'Date','#MIVND':'Invoice','#MLIN#':'Line','#MPRD#':'SupplierBrandSizeNumber','#MQTYS':'QuantitySold',
                    'CSCRDT':'SeasonCreditLimit','CCRLIM':'CreditLimit','CONPRM':'OnPremise','CUSPMC':'MerchandiseClass','CDDAY':'X2',
                    '#MEXT$':'Revenue','CTERM@':'TermsCode','#MCOS$':'Cost','CSTOR#':'BarChainCode','#MCHN#':'ChainId','#MCUSY':'CustomerType',
                    '#MSLSP':'SalespersonId','#MQPC':'QPC','#MCLA@':'ClassCode','#MSIZ@':'SizeCode','#MBRND':'BrandId','#MQTY@':'QtyCode',
                    '#MCMP':'Warehouse','#MSUPL':'SupplierId','#MCALL':'CallCode','#MPRIO':'Priority','#MINP#':'ProductId','#MPRM@':'X1',
                    'CSTDTE':'CustomerSetup','CUDSCC':'DisplayCaseClass','CSHP':'Ship','CADMBR':'ShipWeekPlan','#MQTYO':'QuantityOrdered',
                    '#MTRCD':'TransactionCode','#MCMRC':'CreditReasonCode'}
        c.rename(columns=col_names, inplace=True)
        c.drop(labels=['X1','X2'], axis=1, inplace=True)

        ## Extract Invoice & Line
        c['InvoiceLine'] = [str(a)+'_'+str(b) for a,b in zip(c.Invoice, c.Line)]

        ## Extract proper dates and derivative data
        c.Date = as400_date(c.Date)
        
        ## Extract Cases 
        CS, QPC = c['QuantitySold'].astype(np.float64), c['QPC'].astype(np.float64)
        BTLS = c['QuantitySold'].astype(np.float64)
        c['Cases'] = np.divide(CS, QPC)
        c['Bottles'] = BTLS
        
        ## Cases short
        c['OutOfStock'] = np.int64(c['QuantitySold'].astype(np.float64) == 0)
        OOS_CR_REASON = c['CreditReasonCode'] == '17'
        TRC_A = c['TransactionCode'] == 'A'
        c.loc[OOS_CR_REASON & TRC_A, 'OutOfStock'] = 1
        Q_ORDERED = c['QuantityOrdered'].astype(np.float64)
        c['CasesOrdered'] = np.divide(Q_ORDERED, QPC)
        c.loc[c['OutOfStock']==1, 'CasesShort'] = np.subtract(c.loc[c['OutOfStock']==1, 'CasesOrdered'].astype(np.float64), c.loc[c['OutOfStock']==1, 'Cases']).astype(np.float64)## OOS mark
         
        ## Label customer types, call codes, class codes & warehouse
        type_map = {'A':'Bar/Tavern','C':'Country Club','E':'Transportation/Airline','G':'Gambling',\
                        'J':'Hotel/Motel','L':'Restaurant','M':'Military','N':'Fine Dining','O':'Internal',\
                        'P':'Country/Western','S':'Package Store','T':'Supermarket/Grocery','V':'Drug Store',\
                        'Y':'Convenience Store','Z':'Catering','3':'Night Club','5':'Adult Entertainment','6':'Sports Bar',\
                        'I':'Church','F':'Membership Club','B':'Mass Merchandiser','H':'Fraternal Organization',\
                        '7':'Sports Venue'}
        c.CustomerType = c.CustomerType.map(type_map)
        call_codes = {'01':'Customer Call','02':'ROE/EDI','03':'Salesperson Call','04':'Telesales','BH':'Bill & Hold',
                     'BR':'Breakage','CP':'Customer Pickup','FS':'Floor Stock','HJ':'High Jump','KR':'Keg Route',
                     'NH':'Non-Highjump','NR':'Non-Roadnet','PL':'Pallets','PR':'Personal','RB':'Redbull',
                     'SA':'Sample','SP':'Special','WD':'Withdrawal'}
        c.CallCode = c.CallCode.map(call_codes)
        product_class_map = {'10':'Liquor', '25':'Spirit Coolers', '50':'Wine', '51':'Fine Wine', '53':'Keg Wine',
                                '55':'Sparkling Wine & Champagne', '58':'Package Cider', '59':'Keg Cider', '70':'Wine Coolers',
                                '80':'Malt Coolers/3.2 Beer', '84':'High-Alcohol Malt', '85':'Beer', '86':'Keg Beer', 
                                '87':'Keg Beer w/ Deposit', '88':'High Alcohol Kegs', '90':'Water/Soda', '91':'Other Non-Alcoholic',
                                '92':'Red Bull', '95':'Taxable Items - On Premise', '99':'Miscellaneous'}
        c.ClassCode = c.ClassCode.map(product_class_map)
        c['WarehouseCrossdock'] = c.Warehouse.map({'1':'KC','2':'STL','3':'COL','5':'SPFD'})
        c['Warehouse'] = c.Warehouse.map({'1':'KC','2':'STL','3':'STL','5':'KC'})
        
        ## Merge with calendar
        calendar = pd.DataFrame()
        for yr in [2016,2017,2018]:
            calnew = generate_calendar(year=yr)
            calendar = calendar.append(calnew)
        c = c.merge(calendar, on='Date', how='left')
        c['DOM'] = c.Date.apply(lambda x: x.day)
        
        ## Change bill & hold dates to next month
        incr_by_5 = lambda d: d + datetime.timedelta(days=5)
        is_bill_n_hold = c.CallCode == 'Bill & Hold'
        c.loc[is_bill_n_hold, 'Date'] = c.loc[is_bill_n_hold, 'Date'].apply(incr_by_5)
        c.loc[is_bill_n_hold, 'Month'] = c.loc[is_bill_n_hold, 'Date'].apply(lambda d: dt.strftime(d, '%B'))
        c.loc[is_bill_n_hold, 'Year'] = c.loc[is_bill_n_hold, 'Date'].apply(lambda d: d.year)
        
        ## Append new data to a dataframe that compiles all of it
        DF_OUT = DF_OUT.append(c)

    return DF_OUT

# Gets pw_custseg query which is mass query of MTC1
path = 'C:\\Users\\pmwash\\Desktop\\Re-Engineered Reports\\Customer Segmentation\\Data\\*.csv'
all_files = glob.glob(path)

# specify which months to grab to match above
months_in_files = [str(s).split('\\pw_custseg ')[1].split('.csv')[0] for s in all_files]
desired_months = ['12-2017', '01-2018', '02-2018']
selected_files = [all_files[f] for f in np.arange(len(all_files)) if months_in_files[f] in desired_months]

# run function defined above
mtc_df = order_fulfillment_rate(selected_files)
# mtc_df.head()

In [None]:
def combine_ops_and_mtc(ops_df, mtc_df, net_cases=False):
    '''Aggregates by month both dataframes then merges them on
    warehouse, year and month in order to derive CPMH at various levels
    
    Still need to do it by type (i.e. warehouse hours)
    '''
    # preprocess
    if net_cases:  mtc_df = mtc_df.loc[mtc_df.Cases > 0]
    NO_SPACES = lambda s: str(s).replace(' ', '')
    mtc_df.Warehouse = mtc_df.Warehouse.apply(NO_SPACES)
    mtc_df.Year = mtc_df.Year.apply(NO_SPACES)
    ops_df.warehouse = ops_df.warehouse.apply(NO_SPACES)
    ops_df.year = ops_df.year.apply(NO_SPACES)
    
    # process mtc_df so it is aggregated by month
    grpcols_mtc = ['Year','Warehouse','Month']
    bymonth_mtc_df = pd.DataFrame(mtc_df.groupby(grpcols_mtc)[['Cases', 'Revenue']].sum())
    
    # process ops_df so it is aggregated by month
    # first do for day crew
    grpcols_ops = ['year','warehouse','month']
    agg_cols = ['overtime|hours', 'regular|hours', 'total|hours']
    ops_dfx = ops_df.loc[ops_df.night_crew==False]
    bymonth_ops_df = ops_dfx.groupby(grpcols_ops)[agg_cols].sum()
    bymonth_ops_df.index.rename(['Year', 'Warehouse', 'Month'], inplace=True)
    
    # first merged dataframe creation
    # merge the dataframes on warehouse, month and year
    bymonth_df = bymonth_ops_df.join(bymonth_mtc_df, how='left')
    proc_str = lambda col: str(col).lower().replace('|', '_')
    bymonth_df.columns = [proc_str(col) for col in bymonth_df.columns]
    
    # derive cpmh for day crew
    bymonth_df.rename(columns={'regular_hours': 'day_regular_hours', 
                              'overtime_hours': 'day_overtime_hours',
                              'total_hours': 'day_total_hours'}, inplace=True)
    bymonth_df['day_cpmh_adjusted'] = np.divide(bymonth_df['cases'], 
                                                bymonth_df['day_regular_hours'])
    
    # do it again for night crew
    # process ops_df so it is aggregated by month
    ops_dfx = ops_df.loc[ops_df.night_crew==True]
    ops_dfx = ops_dfx.groupby(grpcols_ops)[agg_cols].sum()
    ops_dfx.rename(columns={'regular|hours': 'night_regular_hours', 
                            'overtime|hours': 'night_overtime_hours',
                            'total|hours': 'night_total_hours'}, inplace=True)
    ops_dfx.index.rename(['Year', 'Warehouse', 'Month'], inplace=True)
    # adding second merge to combined dataframe
    bymonth_df = bymonth_df.join(ops_dfx) # merge 2
    
    # derive cpmh for night crew
    bymonth_df['night_cpmh_adjusted'] = np.divide(bymonth_df['cases'], 
                                                  bymonth_df['night_regular_hours'])
    
    # do it one more time for combined
    # process ops_df so it is aggregated by month
    ops_dfx = ops_df.groupby(grpcols_ops)[agg_cols].sum()
    ops_dfx.rename(columns={'regular|hours': 'overall_regular_hours', 
                            'overtime|hours': 'overall_overtime_hours',
                            'total|hours': 'overall_total_hours'}, inplace=True)
    ops_dfx.index.rename(['Year', 'Warehouse', 'Month'], inplace=True)
    # adding second merge to combined dataframe
    bymonth_df = bymonth_df.join(ops_dfx) # merge 2
    
    # derive cpmh for night crew
    bymonth_df['overall_cpmh_adjusted'] = np.divide(bymonth_df['cases'], 
                                                    bymonth_df['overall_regular_hours'])
    
    # make ix lowercase for plotting
    bymonth_df.index.names = [str(s).lower() for s in bymonth_df.index.names]
    bymonth_df.reset_index(inplace=True, drop=False)
    #bymonth_df.set_index('year', inplace=True)
    
    return bymonth_df

bymonth_df = combine_ops_and_mtc(ops_df, mtc_df)
# bymonth_df.head()

# specify parameters
subgroup = 'year'
y_list = ['day_cpmh_adjusted', 'night_cpmh_adjusted', 'overall_cpmh_adjusted']

print('''
CPMH by Warehouse/Month
''')

interactive_bar_plot(bymonth_df, y_list, subgroup)

-----------------------------------------------------------

# *Peripheral Information*

Information shown below is non-central to the intended purpose of this analysis. 

-----------------------------------------------------------
# Total Employee Wages 

Below wages are plotted by various categories.  Numbers shown are summed by group and month to show changes over time.  

## Wages by Warehouse & Day/Night

`True` indicates "Night Crew", `False` indicates "Day Crew."

In [None]:
# specify parameters
subgroup = 'night_crew'
y_list = ['doubletime|wages', 'overtime|wages', 'regular|wages', 'total|wages']

print('''
Wages ($) by Warehouse & Day/Night Cre
''')

interactive_bar_plot(ops_df, y_list, subgroup)

## Wages by Warehouse & Manager

In [None]:
# specify parameters
subgroup = 'reports_to_name'
y_list = ['doubletime|wages', 'overtime|wages', 'regular|wages', 'total|wages']

print('''
Wages ($) by Warehouse/Manager
''')

interactive_bar_plot(ops_df, y_list, subgroup)

## Wages by Warehouse & Labor Level

In [None]:
# specify parameters
subgroup = 'labor_level'
y_list = ['doubletime|wages', 'overtime|wages', 'regular|wages', 'total|wages']

print('''
Wages ($) by Warehouse/Labor Level
''')

interactive_bar_plot(ops_df, y_list, subgroup)

-----------------------------------------------------------

# Total Employee Hours 

The plots below reflect same plotting method as was employed above, only it is plotting employee hours on the y-axis.    

## Hours by Warehouse & Day/Night

`True` indicates "Night Crew", `False` indicates "Day Crew."

In [None]:
# specify parameters
subgroup = 'night_crew'
y_list = ['doubletime|hours', 'overtime|hours', 'regular|hours', 'total|hours']

print('''
Hours by Warehouse/Labor Level
''')

interactive_bar_plot(ops_df, y_list, subgroup, y_axis_label='Total Hours')

## Hours by Warehouse & Manager

In [None]:
# specify parameters
subgroup = 'labor_level'
y_list = ['doubletime|hours', 'overtime|hours', 'regular|hours', 'total|hours']

print('''
Hours by Warehouse/Labor Level
''')

interactive_bar_plot(ops_df, y_list, subgroup, y_axis_label='Total Hours')

# Appendix & Technical Notes

## About the Data

Data for this report, specifically the HR data, did not start coming in until February 2018.  **This means that only information from February 2018 forward will be shown.**  Due to lack of structured processing and organization of data in the past we do not have a history available unless ADP can provide it.  Given we switched HR system providers in 2017 this is unlikely.  

The data for this report is sourced from the following:

- Summary data from ADP is emailed from HR bi-monthly (1st and 15th) to summarize all operations hours.
- A Roster is merged in to match managers and thus warehouse information.
- Following precedent (i.e. the old version of this report), Non-Standard Cases are used.  If the Daily Report is ever systemitized then we can use Split Cases instead (if desired).
- The query `pw_custseg` is used to access `MTC1` data.  This needs to be updated before running this report.

Names are dropped from all data sets to protect employee privacy.  

In [None]:
# import matplotlib.pyplot as plt
# import seaborn as sns
# from math import ceil as roundup
# %matplotlib inline

# def plot_timeseries_by_category(ops_df, x, y='total|wages', category='labor_level', 
#                      sub_category='warehouse', suptitle=None, verbose=0):
#     '''
    
#     '''
#     all_categories = ops_df[category].unique()
#     n_levels = len(all_categories)
    
#     if verbose:
#         print('''
#         There are {} levels in this category:
#         {}
#         '''.format(n_levels, all_levels))

#     n_rows = roundup(n_levels/2)
#     fig, axes = plt.subplots(nrows=n_rows, ncols=2, figsize=(15, 6*n_rows), sharex=False)

#     for i, cat in enumerate(all_categories):
#         _df = ops_df.loc[ops_df[category] == cat]
#         _df =  _df.groupby([sub_category, x])[y].sum().reset_index(drop=False)

#         all_subcategories = _df[sub_category].unique()
        
#         for subcat in all_subcategories:
#             _df = _df.loc[_df[sub_category] == subcat]
#             N = np.arange(len(_df[x].unique()))
            
#             if _df.shape[0] == 0:
#                 pass
#             elif i < n_rows:
#                 _df.plot(x, y, kind='barh', ax=axes[i, 0])
#                 #axes[i, 0].set_xticks(N, _df[x])
#                 axes[i, 0].grid(alpha=.7)
#                 axes[i, 0].set_xlabel('Total Wages')
#                 axes[i, 0].set_ylabel('')
#                 axes[i, 0].set_title(subcat)
#                 axes[i, 0].legend(loc='best')
#             else:
#                 _df.plot(x, y, kind='barh', ax=axes[i-n_rows, 1])
#                 axes[i-n_rows, 1].grid(alpha=.7)
#                 axes[i-n_rows, 1].set_xlabel('Total Wages')
#                 axes[i-n_rows, 1].set_ylabel('')
#                 axes[i-n_rows, 1].set_title(subcat)
#                 axes[i-n_rows, 1].legend(loc='best')
#         sns.despine()
        
#     if suptitle != None: plt.suptitle(suptitle)
    
#     return None
        
# plot_by_category(ops_df, 
#                  x='month',
#                  y='total|wages', 
#                  category='labor_level', 
#                  sub_category='warehouse', 
#                  suptitle='Hours v. Wages by Manager')
