# Graphics Report V5

## Paul M. Washburn

In [1]:
import pandas as pd
import numpy as np
from pandas import read_table
import time
from datetime import datetime as dt
import re
import time # add decorator to time functions
from collections import OrderedDict
from functools import wraps

pd.set_option("display.max_columns", 1000)

pct_change = lambda new, old: (new - old) / old
len_unique = lambda x: len(pd.unique(x))

def timing_function(some_function):
    '''
    Decorator function.  Outputs the time a function takes to execute.
    '''
    @wraps(some_function)
    def wrapper(*args, **kwargs):
        t1 = time.time()
        result = some_function(*args, **kwargs)
        t2 = time.time()
        time_elapsed = round((t2 - t1), 2)
        print('Runtime: ' + str(time_elapsed) + ' seconds')
        return result
    
    return wrapper

In [None]:
print('Make sure Franky shows up (in salesperson field) as "TOCCO, FRANKY"')
active_designers = ['Wilhoit, Katie','Wright, Paige','Whited, Phillip D', 'Tocco, Franky']
active_designer_salespeople = ['WILHOIT, KATIE','WRIGHT, PAIGE','WHITED, PHILIP D.', 'TOCCO, FRANKY']

def fetch_current_month_year():
    '''
    Fetches month & year based on current date.
    '''
    this_mo = dt.now().replace(month=last_mo).strftime('%B')
    this_yr = dt.now().year
    time_dict = {'this_yr': this_yr, 'this_mo': this_mo}
    
    return time_dict

@timing_function
def preprocess_raw_data():
    '''
    Pre-processes information from Diver sent on Mondays via email.
    '''
    df = read_table('N:/Operations Intelligence/Monthly Reports/Graphics/Raw Data/POSTER Detail_dump.txt', 
                    engine='python', doublequote=False, header=0, sep=',')

    col_names = {'ID':'ID',
                'DVMKET':'Warehouse',
                'DVJOBID':'JobId',
                'DVJOBNAME':'JobName',
                'DVREPRNT':'Reprint',
                'DVJBSTAT':'JobStatus',
                'DVCUSTNUM':'CustomerId',
                'DVCUSTN':'Customer',
                'CHINVDAT':'InvoiceDate',
                'DVITMID':'ItemId',
                'DVITMCAT':'ItemCategory',
                'DVITMDSC':'ItemDescription',
                'DVITMWTH':'ItemWidth',
                'DVITMHGT':'ItemHeight',
                'DVQTYORD':'QtyOrdered',
                'DVMULTPL':'GraphicsJobOption', #check with Rachel for semantics
                'DVLINAMT':'JobTotalCost',
                'DVNMBMN':'NonMajorBrandsMentions', #verify with Rachel
                'DVJOBSTR':'AccessoryPrintBanner',
                'DVSUPP':'SupplierId',
                'DVSUPNAM':'Supplier',
                'DVBRNDID':'ProductId',
                'DVBRNDNM':'Product',
                'DVSBRNID':'BrandId',
                'DVSBRNNM':'Brand',
                'DVMBMENT':'MajorBrandsMentions', #verify with Rachel
                'DVCBRATE':'ChargebackRate',
                'DVDISTCS':'PosterPrice', #verify with Rachel
                'DVCBAMT':'ChargebackAmount',
                'DVCBTOTL':'ChargebackTotal',
                'DVSPNBR':'SalespersonId',
                'DVSPNAME':'Salesperson',
                'DVIMAGE':'DVIMAGE',
                'DVAPRDIR':'Director',
                'DVBRND':'DVBRND',#DVSBRNID
                'DVITMNUM':'unsure11',
                'DVITMOPT':'DVITMOPT',
                'DVCBOVRD':'BrandMentionOverride',
                'DVGRPHDS':'Designer',
                'DVGRPHDF':'User'}

    df.rename(columns=col_names, inplace=True)
    df.Warehouse = df.Warehouse.astype(str)
    df.Warehouse = df.Warehouse.map({'1':'Kansas City', '2':'Saint Louis', '3':'Columbia', '5':'Springfield'})
    df.set_index('ID', inplace=True)
    df.drop(labels=['DVIMAGE','ItemWidth','ItemHeight','DVITMOPT'], axis=1, inplace=True)

    df.to_csv('N:/Operations Intelligence/Monthly Reports/Graphics/Raw Data/renamed raw data.csv', index=False)
    
    print('Data re-written to disk.')
    
def generate_calendar(year):
    '''
    Generates calendar by day with relevant information.
    '''
    from pandas.tseries.offsets import YearEnd
    from pandas.tseries.holiday import USFederalHolidayCalendar
    
    start_date = pd.to_datetime('1/1/'+str(year))
    end_date = start_date + YearEnd()
    DAT = pd.date_range(str(start_date), str(end_date), freq='D')
    WK = [d.strftime('%U') for d in DAT]
    MO = [d.strftime('%B') for d in DAT]
    holidays = USFederalHolidayCalendar().holidays(start=start_date, end=end_date)

    cal = pd.DataFrame({'Date':DAT, 'WeekNumber':WK, 'Month':MO})
    
    cal['Year'] = [format(d, '%Y') for d in DAT]
    cal['Weekday'] = [format(d, '%A') for d in DAT]
    cal['DOTM'] = [format(d, '%d') for d in DAT]
    cal['IsWeekday'] = cal.Weekday.isin(['Monday','Tuesday','Wednesday','Thursday','Friday'])
    cal['IsProductionDay'] = cal.Weekday.isin(['Tuesday','Wednesday','Thursday','Friday'])
    last_biz_day = [str(format(dat, '%Y-%m-%d')) for dat in pd.date_range(start_date, end_date, freq='BM')]
    cal['LastSellingDayOfMonth'] = [dat in last_biz_day for dat in cal['Date'].astype(str)]

    cal.loc[cal.WeekNumber.isin(['00','01','02','03','04','05','06','07','08','09','50','51','52','53']), 'Season'] = 'Winter'
    cal.loc[cal.WeekNumber.isin(['10','11','12','13','14','15','16','17','18','19','20','21','22']), 'Season'] = 'Spring'
    cal.loc[cal.WeekNumber.isin(['23','24','25','26','27','28','29','30','31','32','33','34','35']), 'Season'] = 'Summer'
    cal.loc[cal.WeekNumber.isin(['36','37','38','39','40','41','42','43','44','45','46','47','48','49']), 'Season'] = 'Autumn'
    cal['Holiday'] = cal.Date.isin(holidays)
    cal['HolidayWeek'] = cal['Holiday'].rolling(window=7,center=True,min_periods=1).sum()
    cal['ShipWeek'] = ['A' if int(wk) % 2 == 0 else 'B' for wk in WK]

    cal.reset_index(drop=True, inplace=True)
    
    return cal

Make sure Franky shows up (in salesperson field) as "TOCCO, FRANKY"


In [None]:
preprocess_raw_data()

df = pd.read_csv('N:/Operations Intelligence/Monthly Reports/Graphics/Raw Data/renamed raw data.csv')

In [None]:
@timing_function
def munge_data(df):
    '''
    Cleans up raw data to put into useable format.
    '''
    # concatenate fields to identify unique jobs and items
    df['JobItemBrandId'] = df.JobId.astype(str) + '_' + df.ItemId.astype(str) + '_' + df.BrandId.astype(str)
    df['JobItemId'] = df['JobId'].astype(str) + '_' + df.ItemId.astype(str)
    
    # sort dataframe in decreasing order by length of "JobStatus" field
    sorter = df.JobStatus.str.len().sort_values(ascending=False).index
    df = df.reindex(sorter)
    
    # mark dates etc
    df['InvoiceDate'] = dat = pd.to_datetime(df.InvoiceDate)
    df['InvoiceWeek'] = [d.strftime('%U') for d in dat]
    df['InvoiceMonth'] = [d.strftime('%B') for d in dat]
    df['InvoiceYear'] = [d.strftime('%Y') for d in dat]
    df['Weekday'] = [format(d, '%A') for d in dat]
    df['DOTM'] = [format(d, '%d') for d in dat]
    
    # extract profit from price/chargeback
    print('Verify veracity:  chargeback - poster_price = profit')
    df['Profit'] = np.subtract(df.ChargebackTotal.astype(np.float64), df.PosterPrice.astype(np.float64))
    
    # fix data errors that should've never happened
    df['Designer'] = [d if d != 'FRANKY TOCCO' else 'Tocco, Franky' for d in df.Designer.tolist()]
    
    return df

df = munge_data(df)

In [None]:
def extract_stage_name(job_status):
    '''
    Takes list of job statuses (raw) and removes dates, returning
    only the title of each stage.
    '''
    # specify lambda functions
    drop_dates = lambda x: re.sub(r"\\s*\\([^\\)]+\\)", '', x)
    drop_spaces = lambda x: re.sub(r'[[:space:]]', '', x)
    drop_second_element = lambda x: str(x).split(" (")[0].replace(' ', '')

    # do work
    job_status = [drop_dates(s) for s in job_status]
    job_status = [drop_spaces(s) for s in job_status]
    job_status = [drop_second_element(s) for s in job_status]
    
    return job_status


def stage_dates_by_job(job_status, job_ids):
    '''
    Extracts stages and dates into a dict(dict()) item
    by job id number.
    '''
    # specify lambda functions
    munge_date = lambda x: format(dt.strptime(x, '%b %d %Y %I:%M%p'), '%Y-%m-%d %H:%M')

    # split each element in job status at the comma 
    job_status_split = [str(s).split(',') for s in job_status]

    # derive stages for each job
    job_status_dict = dict()
    for job, job_id in zip(job_status_split, job_ids):
        # get dates
        dat = [str(s).split('(')[1].replace(')', '') for s in job]
        dat = [munge_date(d) for d in dat]

        # get job status names
        sts = extract_stage_name(job)

        # save to dict
        job_status_dict[job_id] = dict(zip(sts, dat))

    return job_status_dict

# send job status & job ids to list
job_status = df.JobStatus.tolist()
job_ids = df.JobId.tolist()

job_status_dict = stage_dates_by_job(job_status, job_ids)

@timing_function
def compile_all_job_stages(job_status_dict):
    '''
    Calls the functions above.
    '''
    job_stage_list = list()
    for item in job_status_dict.items():
        js_dict = item[1]
        js_dict.update({'JobId': item[0]})
        job_stage_list.append(js_dict)
        
    job_stage_df = pd.DataFrame(job_stage_list)
    job_stage_df = job_stage_df.sort_values(['AssembledandShipped', 'Incomplete'], ascending=False)
    
    return job_stage_df

job_summary_df = compile_all_job_stages(job_status_dict)

df = job_summary_df.merge(df, on='JobId', how='outer')

# sort dataframe
sort_cols = ['AssembledandShipped', 'Incomplete', 'JobId']
df.sort_values(sort_cols, ascending=False, inplace=True)

In [None]:
def id_reprint_redesign(df):
    '''
    Identifies reprints and redesigns by generating a boolean column.
    '''
    # reprints & redesigns
    df['Reprint'] = df.Reprint.astype(bool)
    df['Redesign'] = ~df.AwaitingArtworkRedesign.isnull()

    return df

def id_menus_accessories(df):
    '''
    Identifies menus and accessories by generating a boolean column.
    '''
    # mark menus and accessories
    accessories = ['Menu Books','Table Top Wrap','TT A-Frame Holder','TT A-Frame Holder (Holder Only)', 
                   'TT Acrylic Stand','TT Acrylic Stand (Stand Only)','TT Flip Stand','TT Flip Stand (Stand Only)',
                   'Vivid Board - Dry Erase','Vivid Board - Dry Erase','Light Box']
    df['Accessory'] = [a in accessories for a in df.ItemCategory.tolist()]

    menus = ['Menu Books','Drink List','Folded Drink List','Tri-Fold Drink List',
             'Folded Menu Cards','Menu Card - Small Format (QTY <- Total Pages)']
    df['Menu'] = [m in menus for m in df.ItemCategory.tolist()]
    
    return df

df = id_reprint_redesign(df)
df = id_menus_accessories(df)

In [None]:
def drop_cancelled_jobs(df):
    '''
    Drops cancelled jobs and displays some summary information.
    '''
    df_cancelled = df.loc[(~df.Cancelled.isnull()) & (df.InvoiceYear == dt.now().year)]
    
    grp_cols = ['Warehouse', 'Salesperson', 'Customer']
    agg_funcs = {'JobId': len_unique, 'Profit': np.sum}
    df_cancelled = df_cancelled.groupby(grp_cols).agg(agg_funcs)
    
    if len(df_cancelled.JobId.tolist()) > 0:
        print('This Year Cancelled Job Summary: \n')
        print(df_cancelled)
    else:
        print('No Jobs Cancelled This Year.')
    
    return df.loc[df.Cancelled.isnull()]

df = drop_cancelled_jobs(df)

# save intermediate data
df.to_csv('C:/Users/pmwash/Desktop/Re-Engineered Reports/Graphics/intermediate_graphics_data_dump_diver.csv', index=False)

In [None]:
def turnaround_by_job(job_summary_df):
    '''
    Returns a dict object with {'JobId': 'Turnaround'} for 
    mapping back into original dataframe.
    '''
    job_summary_df.fillna('', inplace=True)

    # derive turnaround by job id
    datetime_cols = [col for col in job_summary_df.columns.tolist() if col != 'JobId']
    job_summary_df[datetime_cols] = job_summary_df[datetime_cols].apply(pd.to_datetime)
    job_summary_df['Turnaround'] = np.subtract(job_summary_df.AssembledandShipped, job_summary_df.Incomplete)

    # send turnaround to dict for mapping into df
    turnaround_dict = dict(zip(job_summary_df.JobId.tolist(), job_summary_df.Turnaround.tolist()))

    return turnaround_dict, job_summary_df
    
turnaround_dict, job_summary_df = turnaround_by_job(job_summary_df)