# Invoice-Routing-Functions

In [1]:
# Generate a list of values sampled from a Poisson distribution

from scipy.stats import poisson
import numpy as np

def gen_invoices(poisson_mu, num_vendors):
    
    # poisson_mu is the lambda parameter of the Poisson distribution used to generate
    # the number of invoices for the period
    # Set poisson_mu appropriately to result in approximately the right number of invoices
    #  for a given time period
    # num_vendors is the number of vendors Accounts Payable deals with
    
    invoices_per_vendor = poisson.rvs(poisson_mu, size=num_vendors)
    total_invoices_per_period = np.sum(invoices_per_vendor)
    
    return total_invoices_per_period

In [2]:
# Generate the workload over a period of time
# It could be for any period of time -- substitute the appropriate number in period_duration
# This produces the number of invoices that were routed (both correctly or incorrectly)
#  and the number of invoices that couldn't be tackled that day

from scipy.stats import norm # the Gaussian distribution
import math # for floor and ceil functions

def get_period_workload(num_invoices_input, 
                        mean_std_dev,
                        period_duration, 
                        routing_success_rate):
    
    # num_invoices_input is the arriving number of invoices for the period
    #  It can be a straight number or can be a number generated by gen_invoices function above
    # mean_std_dev is an array [mean, std_dev] -- the Gaussian parameters 
    #   for how long it takes to route an invoice
    mean = mean_std_dev[0]
    std_dev = mean_std_dev[1]
    # num_vendors is the number of vendors Accounts Payable deals with
    # num_seconds_in_period is the amount of time worked in total by all of the 
    # Accounts Payable people who are routing invoices.
    # routing_success_rate is the rate at which invoices are routed correctly
    
    # Array of time it takes to route each invoice
    time_array = norm.rvs(loc=mean, scale=std_dev, size=num_invoices_input)
    print("Time to route the first few invoices...{} ... and the last few {}".\
          format(time_array[0:5], time_array[-5:-1]))
    
    # Cumulative sums of the time_array (this is not strictly the right model, but a good approx)
    time_array_cu = np.cumsum(time_array)
    print("Cumulative time to route the first few invoices...{} ... and the last few {}".\
          format(time_array_cu[0:5], time_array_cu[-5:-1]))
    
    # Split the time_array_cu into two parts: 
    # 1) The list of items where the times are less than or equal to NUM_SECS_WORKED_DAILY
    # 2) The list of items where the times are greater than NUM_SECS_WORKED_DAILY
    invoices_routed = [x for x in time_array_cu if x <= period_duration]
    num_invoices_routed = len(invoices_routed)
    print("Number of invoices routed = {}".format(num_invoices_routed))
    
    # Of the number of the invoices_routed, the ones successfully/correctly routed
    num_correctly_routed = int(math.floor(routing_success_rate * num_invoices_routed))
    print("Number of invoices correctly routed = {}".format(num_correctly_routed))
    
    # The number of invoices not correctly routed
    num_not_correctly_routed = num_invoices_routed - num_correctly_routed
    print("Number of invoices NOT correctly routed = {}".format(num_not_correctly_routed))
    
    # Number of invoices not gotten to because time ran out in the period
    num_invoices_not_routed = len([x for x in time_array_cu if x > period_duration])
    print("Number of invoices not handled due to lack of time = {}".format(num_invoices_not_routed))
    
    # How much extra time was left over in the period (if any)?
    # If this value is negative it means it will take this much more time to 
    # route the invoices that were not handled due to lack of time in the period
    time_left_in_period = period_duration - time_array_cu[-1]
    print("Extra time left over in the period = {}".format(time_left_in_period))
    
    return [num_invoices_input,
            num_invoices_routed, 
            num_correctly_routed, 
            num_not_correctly_routed, 
            num_invoices_not_routed,
            time_left_in_period
           ]

## Assumptions/Constants

In [3]:
# The number of vendors who are sending invoices to Accounts Payable
NUM_VENDORS = 2500

In [4]:
# Poisson Rate - Rate at which a vendor generates invoices
# Choose it so invoices_per_day for the number of vendoors 
# matches the real numbers seen by the organization
MU = 0.35

In [5]:
# Number of AP Specialists routing invoices
NUM_AP_SPECIALISTS = 4

# Number of working seconds in a day
# Number of hours worked by a single AP Specialist in a day
NUM_HOURS_WORKED_DAILY_PER_PERSON = 6.5
NUM_HOURS_WORKED_DAILY = NUM_HOURS_WORKED_DAILY_PER_PERSON * NUM_AP_SPECIALISTS
NUM_SECS_WORKED_DAILY = NUM_HOURS_WORKED_DAILY * 60 * 60
print("Number of seconds available for routing invoices per day = {}".format(NUM_SECS_WORKED_DAILY))

Number of seconds available for routing invoices per day = 93600.0


In [6]:
# Number of business days in a month
MONTH_DAYS = 20

In [7]:
NUM_SECS_WORKED_MONTHLY = NUM_SECS_WORKED_DAILY * MONTH_DAYS
print("Number of seconds available for routing invoices per month = {}".format(NUM_SECS_WORKED_MONTHLY))

Number of seconds available for routing invoices per month = 1872000.0


In [8]:
# Invoices for a given month
invoices = [gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS for i in range(10)]
invoices

[17960, 17980, 17420, 18080, 17360, 17420, 18360, 17240, 18020, 18840]

In [9]:
# The time it takes to route an invoice is distributed as a Gaussian
# Mean of the Gaussian distribution for the time it takes to route an invoice
MEAN = 80
# Standard Deviation for the time it takes to route an invoice
STD_DEV = 20

In [10]:
# The time it takes a machine learning system route an invoice is distributed as a Gaussian
# Mean of the Gaussian distribution for the time it takes to route an invoice
MEAN_ML = 0.5
# Standard Deviation for the time it takes to route an invoice
STD_DEV_ML = .002

In [11]:
ROUTING_SUCCESS_RATE = 0.95

In [12]:
# For the incorrectly routed invoices, it take a much longer time for them to get sorted out.
# Hence, the values of the mean and standard deviation for the normal distribution
# of the times it takes to resolve this are much larger than MEAN and STD_DEV

# 3 hours = 10,800 seconds
LARGE_MEAN = 10800
# 30 minutes = 1,800 seconds
LARGE_STD_DEV = 1800

# Assume that the success rate for re-routing is 1
LARGE_ROUTING_SUCCESS_RATE = 1.0

In [13]:
# 1 hour = 3600 seconds
# 2 hours = 7200 seconds
# 3 hours = 10,800 seconds
# 4 hours = 14,400 seconds
TIMES_TO_FIX = [[3600, 600], [7200, 1200], [10800, 1800], [14400, 2400]]

## Results

### CASE 1: No Machine Learning

This is the baseline against which to compare the benefits of the machine learning solution.

In [14]:
# First round of processing by AP Specialists
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, ROUTING_SUCCESS_RATE)
num_high_touch = res1[3]
time_remaining = res1[5]

# Handling the high touch in the second round in the time remaining
res2 = [get_period_workload(num_high_touch, time_to_fix, time_remaining, \
                            LARGE_ROUTING_SUCCESS_RATE) for time_to_fix in TIMES_TO_FIX]

res2    

Time to route the first few invoices...[  58.85249067   56.41052914   63.34892083  110.44108854   80.88286722] ... and the last few [ 99.58078064  57.82384557  93.84999207  57.60581212]
Cumulative time to route the first few invoices...[  58.85249067  115.26301981  178.61194064  289.05302919  369.9358964 ] ... and the last few [ 1414875.5450205   1414933.36886606  1415027.21885813  1415084.82467025]
Number of invoices routed = 17680
Number of invoices correctly routed = 16796
Number of invoices NOT correctly routed = 884
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 456826.5028999923
Time to route the first few invoices...[ 3906.55394924  3575.03816914  3612.53051785  4063.15202707  4741.27699229] ... and the last few [ 3146.54239694  2883.96121001  4127.06318926  3971.31428828]
Cumulative time to route the first few invoices...[  3906.55394924   7481.59211838  11094.12263624  15157.27466331
  19898.55165559] ... and the last few [ 3192420.

[[884, 124, 124, 0, 760, -2750542.5920591033],
 [884, 63, 63, 0, 821, -5950859.4207632877],
 [884, 43, 43, 0, 841, -9013129.6261411943],
 [884, 31, 31, 0, 853, -12210160.869213931]]

### CASE 2: With Machine Learning Up Front

#### 2.1 ML System Accuracy = 0.5 = 50%

In [15]:
ML_SYS_ACC_50 = 0.5

In [16]:
# First round of processing by the ML system
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_50)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before

# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
                           ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]

# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
        for time_to_fix in TIMES_TO_FIX]

res3    

Time to route the first few invoices...[ 0.49974946  0.49854968  0.49602019  0.50017993  0.50084236] ... and the last few [ 0.50015236  0.50087878  0.50029739  0.50319799]
Cumulative time to route the first few invoices...[ 0.49974946  0.99829913  1.49431932  1.99449924  2.49534161] ... and the last few [ 9147.83881359  9148.33969237  9148.83998976  9149.34318775]
Number of invoices routed = 18300
Number of invoices correctly routed = 9150
Number of invoices NOT correctly routed = 9150
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 1862850.1553823473
Time to route the first few invoices...[ 81.58141949  93.69113805  70.02528165  62.74299764  41.75213866] ... and the last few [ 79.31266814  45.75515461  59.96069253  43.35151112]
Cumulative time to route the first few invoices...[  81.58141949  175.27255754  245.29783919  308.04083682  349.79297549] ... and the last few [ 734391.99187621  734437.74703082  734497.70772334  734541.05923446]
Numb

[[458, 314, 314, 0, 144, -518165.13989768946],
 [458, 159, 159, 0, 299, -2136038.7127798423],
 [458, 105, 105, 0, 353, -3770409.0035145287],
 [458, 78, 78, 0, 380, -5451518.315365456]]

#### 2.2 ML System Accuracy = 0.6 = 60%

In [17]:
ML_SYS_ACC_60 = 0.6

In [18]:
# First round of processing by the ML system
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_60)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before

# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
                           ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]

# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
        for time_to_fix in TIMES_TO_FIX]

res3  

Time to route the first few invoices...[ 0.49955008  0.49871643  0.49739794  0.50115547  0.50207393] ... and the last few [ 0.50040959  0.50171421  0.50001161  0.50096635]
Cumulative time to route the first few invoices...[ 0.49955008  0.99826651  1.49566445  1.99681992  2.49889386] ... and the last few [ 8668.00644136  8668.50815557  8669.00816717  8669.50913353]
Number of invoices routed = 17340
Number of invoices correctly routed = 10404
Number of invoices NOT correctly routed = 6936
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 1863329.9934656532
Time to route the first few invoices...[  69.17360973   96.24132137  109.21591763   45.60603689   61.20169756] ... and the last few [ 58.95951329  94.43401605  61.66653335  38.14121088]
Cumulative time to route the first few invoices...[  69.17360973  165.4149311   274.63084873  320.23688562  381.43858318] ... and the last few [ 550030.60479545  550125.0388115   550186.70534485  550224.84655573

[[347, 347, 347, 0, 0, 46660.831880063284],
 [347, 184, 184, 0, 163, -1182453.3980094232],
 [347, 120, 120, 0, 227, -2461457.3361068508],
 [347, 91, 91, 0, 256, -3657916.9457358629]]

#### ML System Accuracy = 0.7 = 70%

In [19]:
ML_SYS_ACC_70 = 0.7

In [20]:
# First round of processing by the ML system 
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
                           [MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_70)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before

# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
                           ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]

# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
        for time_to_fix in TIMES_TO_FIX]

res3  

Time to route the first few invoices...[ 0.50117694  0.50215079  0.50118644  0.4959717   0.50066867] ... and the last few [ 0.4975494   0.49831851  0.50270478  0.49684775]
Cumulative time to route the first few invoices...[ 0.50117694  1.00332773  1.50451417  2.00048587  2.50115454] ... and the last few [ 8938.13811598  8938.63643449  8939.13913928  8939.63598703]
Number of invoices routed = 17880
Number of invoices correctly routed = 12516
Number of invoices NOT correctly routed = 5364
Number of invoices not handled due to lack of time = 0
Extra time left over in the period = 1863059.8593763227
Time to route the first few invoices...[ 109.9587264    70.08682748   83.46690847   89.01753603   78.62326722] ... and the last few [  80.08429955   81.95180789   84.8862476   118.16875157]
Cumulative time to route the first few invoices...[ 109.9587264   180.04555387  263.51246234  352.52999838  431.15326559] ... and the last few [ 430170.82820936  430252.78001725  430337.66626485  430455.8350

[[269, 269, 269, 0, 0, 468398.97304329684],
 [269, 199, 199, 0, 70, -497143.61828381056],
 [269, 133, 133, 0, 136, -1489843.323325834],
 [269, 102, 102, 0, 167, -2426616.9081719443]]