# Invoice-Routing-Functions

In [66]:
# Generate a list of values sampled from a Poisson distribution

from scipy.stats import poisson
import numpy as np

def gen_invoices(poisson_mu, num_vendors):
    
    # poisson_mu is the lambda parameter of the Poisson distribution used to generate
    # the number of invoices for the period
    # Set poisson_mu appropriately to result in approximately the right number of invoices
    #  for a given time period
    # num_vendors is the number of vendors Accounts Payable deals with
    
    invoices_per_vendor = poisson.rvs(poisson_mu, size=num_vendors)
    total_invoices_per_period = np.sum(invoices_per_vendor)
    
    return total_invoices_per_period

In [67]:
# Generate the workload over a period of time
# It could be for any period of time -- substitute the appropriate number in period_duration
# This produces the number of invoices that were routed (both correctly or incorrectly)
#  and the number of invoices that couldn't be tackled that day

from scipy.stats import norm # the Gaussian distribution
import math # for floor and ceil functions

def get_period_workload(num_invoices_input, 
                        mean_std_dev,
                        period_duration, 
                        routing_success_rate):
    
    # num_invoices_input is the arriving number of invoices for the period
    #  It can be a straight number or can be a number generated by gen_invoices function above
    # mean_std_dev is an array [mean, std_dev] -- the Gaussian parameters 
    #   for how long it takes to route an invoice
    mean = mean_std_dev[0]
    std_dev = mean_std_dev[1]
    # num_vendors is the number of vendors Accounts Payable deals with
    # num_seconds_in_period is the amount of time worked in total by all of the 
    # Accounts Payable people who are routing invoices.
    # routing_success_rate is the rate at which invoices are routed correctly
    
    # Array of time it takes to route each invoice
    time_array = norm.rvs(loc=mean, scale=std_dev, size=num_invoices_input)
    print("Time to route the first few invoices...{} ... and the last few {}".\
          format(time_array[0:5], time_array[-5:-1]))
    
    # Cumulative sums of the time_array (this is not strictly the right model, but a good approx)
    time_array_cu = np.cumsum(time_array)
    print("Cumulative time to route the first few invoices...{} ... and the last few {}".\
          format(time_array_cu[0:5], time_array_cu[-5:-1]))
    
    # Split the time_array_cu into two parts: 
    # 1) The list of items where the times are less than or equal to NUM_SECS_WORKED_DAILY
    # 2) The list of items where the times are greater than NUM_SECS_WORKED_DAILY
    invoices_routed = [x for x in time_array_cu if x <= period_duration]
    num_invoices_routed = len(invoices_routed)
    print("Number of invoices routed = {}".format(num_invoices_routed))
    
    # Of the number of the invoices_routed, the ones successfully/correctly routed
    num_correctly_routed = int(math.floor(routing_success_rate * num_invoices_routed))
    print("Number of invoices correctly routed = {}".format(num_correctly_routed))
    
    # The number of invoices not correctly routed
    num_not_correctly_routed = num_invoices_routed - num_correctly_routed
    print("Number of invoices NOT correctly routed = {}".format(num_not_correctly_routed))
    
    # Number of invoices not gotten to because time ran out in the period
    num_invoices_not_routed = len([x for x in time_array_cu if x > period_duration])
    print("Number of invoices not handled due to lack of time = {}".format(num_invoices_not_routed))
    
    # How much extra time was left over in the period (if any)?
    # If this value is negative it means it will take this much more time to 
    # route the invoices that were not handled due to lack of time in the period
    time_left_in_period = period_duration - time_array_cu[-1]
    print("Extra time left over in the period = {}".format(time_left_in_period))
    
    return [num_invoices_input,
            num_invoices_routed, 
            num_correctly_routed, 
            num_not_correctly_routed, 
            num_invoices_not_routed,
            time_left_in_period
           ]

## Assumptions/Constants

In [68]:
# The number of vendors who are sending invoices to Accounts Payable
NUM_VENDORS = 2500

In [69]:
# Poisson Rate - Rate at which a vendor generates invoices
# Choose it so invoices_per_day for the number of vendoors 
# matches the real numbers seen by the organization
MU = 0.35

In [70]:
# Number of AP Specialists routing invoices
NUM_AP_SPECIALISTS = 4

# Number of working seconds in a day
# Number of hours worked by a single AP Specialist in a day
NUM_HOURS_WORKED_DAILY_PER_PERSON = 6.5
NUM_HOURS_WORKED_DAILY = NUM_HOURS_WORKED_DAILY_PER_PERSON * NUM_AP_SPECIALISTS
NUM_SECS_WORKED_DAILY = NUM_HOURS_WORKED_DAILY * 60 * 60
print("Number of seconds available for routing invoices per day = {}".format(NUM_SECS_WORKED_DAILY))

Number of seconds available for routing invoices per day = 93600.0


In [71]:
# Number of business days in a month
MONTH_DAYS = 20

In [72]:
NUM_SECS_WORKED_MONTHLY = NUM_SECS_WORKED_DAILY * MONTH_DAYS
print("Number of seconds available for routing invoices per month = {}".format(NUM_SECS_WORKED_MONTHLY))

Number of seconds available for routing invoices per month = 1872000.0


In [73]:
# Invoices for a given month
invoices = [gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS for i in range(10)]
invoices

[17660, 18100, 18000, 17260, 17460, 17200, 18240, 16960, 17580, 17820]

In [74]:
# The time it takes to route an invoice is distributed as a Gaussian
# Mean of the Gaussian distribution for the time it takes to route an invoice
MEAN = 80
# Standard Deviation for the time it takes to route an invoice
STD_DEV = 20

In [75]:
# The time it takes a machine learning system route an invoice is distributed as a Gaussian
# Mean of the Gaussian distribution for the time it takes to route an invoice
MEAN_ML = 0.5
# Standard Deviation for the time it takes to route an invoice
STD_DEV_ML = .002

In [76]:
ROUTING_SUCCESS_RATE = 0.95

In [77]:
# For the incorrectly routed invoices, it take a much longer time for them to get sorted out.
# Hence, the values of the mean and standard deviation for the normal distribution
# of the times it takes to resolve this are much larger than MEAN and STD_DEV

# 3 hours = 10,800 seconds
LARGE_MEAN = 10800
# 30 minutes = 1,800 seconds
LARGE_STD_DEV = 1800

# Assume that the success rate for re-routing is 1
LARGE_ROUTING_SUCCESS_RATE = 1.0

In [78]:
# 1 hour = 3600 seconds
# 2 hours = 7200 seconds
# 3 hours = 10,800 seconds
# 4 hours = 14,400 seconds
TIMES_TO_FIX = [[3600, 600], [7200, 1200], [10800, 1800], [14400, 2400]]

## Results

### CASE 1: No Machine Learning

This is the baseline against which to compare the benefits of the machine learning solution.

In [79]:
# First round of processing by AP Specialists
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, ROUTING_SUCCESS_RATE)
num_high_touch = res1[3]
time_remaining = res1[5]

# Handling the high touch in the second round in the time remaining
res2 = [get_period_workload(num_high_touch, time_to_fix, time_remaining, \
                            LARGE_ROUTING_SUCCESS_RATE) for time_to_fix in TIMES_TO_FIX]

res2    

Time to route the first few invoices...[  73.59111037  103.03975391   64.97952143   34.7905781   119.44090815] ... and the last few [ 44.55060519  86.56037518  65.8015064   80.28334837]
Cumulative time to route the first few invoices...[  73.59111037  176.63086428  241.61038571  276.40096381  395.84187196] ... and the last few [ 1441124.22671909  1441210.78709427  1441276.58860067  1441356.87194904]
Number of invoices routed = 18060
Number of invoices correctly routed = 17157
Number of invoices NOT correctly routed = 903
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 430538.8057629776
Time to route the first few invoices...[ 4077.13300093  3933.84182431  4092.84949501  4173.34583648  4163.28899603] ... and the last few [ 2902.78245961  4550.5780387   4531.97247009  5075.2840172 ]
Cumulative time to route the first few invoices...[  4077.13300093   8010.97482524  12103.82432024  16277.17015672
  20440.45915275] ... and the last few [ 3265850.

[[903, 118, 118, 0, 785, -2853602.4260804234],
 [903, 57, 57, 0, 846, -6167552.0530747073],
 [903, 40, 40, 0, 863, -9188949.8893210106],
 [903, 29, 29, 0, 874, -12457980.55774913]]

### CASE 2: With Machine Learning Up Front

#### 2.1 ML System Accuracy = 0.5 = 50%

In [80]:
ML_SYS_ACC_50 = 0.5

In [81]:
# First round of processing by the ML system
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_50)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before

# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
                           ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]

# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
        for time_to_fix in TIMES_TO_FIX]

res3    

Time to route the first few invoices...[ 0.49694091  0.50014349  0.50452245  0.49810976  0.49808957] ... and the last few [ 0.49925621  0.50247864  0.49801224  0.50068348]
Cumulative time to route the first few invoices...[ 0.49694091  0.9970844   1.50160685  1.99971661  2.49780618] ... and the last few [ 8357.64151582  8358.14399446  8358.6420067   8359.14269018]
Number of invoices routed = 16720
Number of invoices correctly routed = 8360
Number of invoices NOT correctly routed = 8360
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 1863640.3576270093
Time to route the first few invoices...[ 73.07502431  61.64376862  66.82516419  95.55070634  91.23933871] ... and the last few [ 105.4872334    58.42285654   62.98154706   70.40808693]
Cumulative time to route the first few invoices...[  73.07502431  134.71879294  201.54395712  297.09466347  388.33400218] ... and the last few [ 669853.93686279  669912.35971934  669975.34126639  670045.74935332]


[[418, 329, 329, 0, 89, -321231.85537168733],
 [418, 169, 169, 0, 249, -1816480.4666475528],
 [418, 109, 109, 0, 309, -3350717.9865032244],
 [418, 82, 82, 0, 336, -4868954.5066288654]]

#### 2.2 ML System Accuracy = 0.6 = 60%

In [82]:
ML_SYS_ACC_60 = 0.6

In [83]:
# First round of processing by the ML system
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_60)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before

# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
                           ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]

# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
        for time_to_fix in TIMES_TO_FIX]

res3  

Time to route the first few invoices...[ 0.50113014  0.50249261  0.50302499  0.50014123  0.50073207] ... and the last few [ 0.5029012   0.50093077  0.50091133  0.50300161]
Cumulative time to route the first few invoices...[ 0.50113014  1.00362275  1.50664774  2.00678897  2.50752105] ... and the last few [ 8467.80989925  8468.31083002  8468.81174134  8469.31474295]
Number of invoices routed = 16940
Number of invoices correctly routed = 10164
Number of invoices NOT correctly routed = 6776
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 1863530.18118861
Time to route the first few invoices...[ 107.4550229    63.52596077   67.73729329   61.44292147   97.84177326] ... and the last few [ 70.12812368  62.35764466  82.12747291  81.54727386]
Cumulative time to route the first few invoices...[ 107.4550229   170.98098367  238.71827696  300.16119843  398.0029717 ] ... and the last few [ 541060.91878409  541123.27642875  541205.40390166  541286.95117552]


[[339, 339, 339, 0, 0, 110412.76246189396],
 [339, 186, 186, 0, 153, -1087306.1417451992],
 [339, 124, 124, 0, 215, -2351396.4029696938],
 [339, 91, 91, 0, 248, -3533128.8137930809]]

#### ML System Accuracy = 0.7 = 70%

In [84]:
ML_SYS_ACC_70 = 0.7

In [85]:
# First round of processing by the ML system 
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_70)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before

# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
                           ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]

Time to route the first few invoices...[ 0.50069682  0.50194818  0.50143825  0.50088938  0.50081402] ... and the last few [ 0.49839781  0.50028874  0.50177404  0.50032152]
Cumulative time to route the first few invoices...[ 0.50069682  1.002645    1.50408325  2.00497263  2.50578664] ... and the last few [ 8517.73265888  8518.23294762  8518.73472166  8519.23504318]
Number of invoices routed = 17040
Number of invoices correctly routed = 11928
Number of invoices NOT correctly routed = 5112
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 1863480.2633561487
Time to route the first few invoices...[  81.15780203   66.64399367  116.64626633   85.71372874   60.91325191] ... and the last few [ 116.29724404   89.15917995   45.74524449   78.54079282]
Cumulative time to route the first few invoices...[  81.15780203  147.80179571  264.44806204  350.16179078  411.07504269] ... and the last few [ 405657.45460755  405746.61378751  405792.359032    405870.8998