In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import random
from random import randint
import time

In [2]:
DAYS = pd.Timedelta(days=1)

In [3]:
FILE_NAME = "PQ2MON - Orders - Weeks -1 to -109 (1).xls.xlsx"
DF = pd.read_excel(FILE_NAME)
DF.head(5)

Unnamed: 0,Client Grouping1,Customer Group,Requested Mode,Priority,Requested Trailer Class,Shipper Region3,Consignee Region3,Lane ID - City to City,Start Date,Completion Date,Order #,Avg. Weekly Frequency
0,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-04-16,2019-04-18,3790088,0
1,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-05-07,2019-05-09,3810953,0
2,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-06-07,2019-06-10,3841144,0
3,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-07-09,2019-07-11,3869973,0
4,Corporate,3M Canada Company,ROAD,Standard,DRY,PQ2MON,USMWIA,"STE THERESE,PQ/TE to PRAIRIE DU CHIEN,WI/",2020-02-26,2020-03-02,4066936,0


In [4]:
# Clean the irrelevant columns, only keeping the rows that has PQ data
# internal are orders that starts and completes within the PQ region itself.
# external are orders that goes from anyhwere in PQ to other region and vice versa
def clean_df(df,drop_columns=None,region='PQ'):
    if drop_columns == None: drop_columns = ["Customer Group","Requested Mode","Order #","Avg. Weekly Frequency","Lane ID - City to City","Client Grouping1"]
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    df_clean = df[cond_out|cond_in].drop(columns=drop_columns)
    df_clean.loc[:,'Shipper Region3'].fillna("UNKNOWN",inplace=True)
    df_clean.loc[:,'Consignee Region3'].fillna("UNKNOWN",inplace=True)
    return df_clean


In [5]:
DF_CLEAN = clean_df(DF)
DF_CLEAN

Unnamed: 0,Priority,Requested Trailer Class,Shipper Region3,Consignee Region3,Start Date,Completion Date
0,Standard,DRY,ON2TOR,PQ2S,2019-04-16,2019-04-18
1,Standard,DRY,ON2TOR,PQ2S,2019-05-07,2019-05-09
2,Standard,DRY,ON2TOR,PQ2S,2019-06-07,2019-06-10
3,Standard,DRY,ON2TOR,PQ2S,2019-07-09,2019-07-11
4,Standard,DRY,PQ2MON,USMWIA,2020-02-26,2020-03-02
...,...,...,...,...,...,...
23864,Standard,DRY,PQ2MON,BC2VAN,2020-06-18,2020-06-22
23865,Standard,REEFER,PQ2MON,ABNEDM,2020-02-12,2020-02-18
23866,Standard,REEFER,PQ2MON,ABNEDM,2020-02-12,2020-02-20
23867,Standard,REEFER,PQ2MON,ABNEDM,2020-02-13,2020-02-18


In [7]:
# Get the counts for a schedule, such as inbound and out bound figures. Also calculates the imbalance levels
def get_df_count(df,first_day=None,last_day=None,fill_missing=False,region='PQ'):
    first_day = get_first_day(df) if first_day == None else first_day
    last_day  = get_last_day(df)  if last_day == None else last_day
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    
    df_out = df[cond_out]['Start Date'].value_counts().sort_index(axis=0)
    df_in  = df[cond_in]['Completion Date'].value_counts().sort_index(axis=0)
    df_count = pd.concat([df_out, df_in],axis=1).fillna(0, downcast='infer').rename(columns={"Start Date": "Outbound", "Completion Date": "Inbound"})
    if fill_missing and first_day and last_day:
        df_count = df_count.join(pd.DataFrame(index=get_all_days(first_day,last_day)),how='outer').fillna(0) 
    df_count["Imbalance"]  = df_count["Inbound"] - df_count["Outbound"]
    df_count["cImbalance"] = df_count["Imbalance"].cumsum()
    return df_count.loc[first_day:last_day].astype('int64')

def get_all_days(first_day,last_day):
    if type(first_day) == pd.Timestamp and type(last_day) == pd.Timestamp:
        return [first_day + i*DAYS for i in range(int((last_day-first_day).days+1))]
    else: return list()

def get_first_day(df,col=None,region='PQ'):
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    if col == 'Start Date':
        return df[cond_out].loc[:,col].min()
    elif col == 'Completion Date':
        return df[cond_in].loc[:,col].min()
    else:
        return min(df[cond_out].loc[:,'Start Date'].min(),df[cond_in].loc[:,'Completion Date'].min())
    
def get_last_day(df,col=None,region='PQ'):
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    if col == 'Start Date':
        return df[cond_out].loc[:,col].max()
    elif col == 'Completion Date':
        return df[cond_in].loc[:,col].max()
    else:
        return max(df[cond_out].loc[:,'Start Date'].max(),df[cond_in].loc[:,'Completion Date'].max())

In [8]:
# Get all the orders that start and/or completes on a specific day
def get_df_day(df,day,col=None,region='PQ'):
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    cond_start_date = df['Start Date'] == day
    cond_compl_date = df['Completion Date'] == day
    
    if   col == 'Start Date':      
        return df[cond_out & cond_start_date]
    elif col == 'Completion Date': 
        return df[cond_in & cond_compl_date]
    else: 
        return df[(cond_out & cond_start_date)|(cond_in & cond_compl_date)]

# Get all orders that starts and/or completes in a specific time span
def get_df_span(df,first_day=None,last_day=None,col=None,region='PQ'):
    first_day = get_first_day(df) if first_day == None else first_day
    last_day  = get_last_day(df)  if last_day == None else last_day
    cond_start_date = (df["Start Date"] >= first_day) & (df["Start Date"] <= last_day)
    cond_compl_date = (df["Completion Date"] >= first_day) & (df["Completion Date"] <= last_day)
    cond_out = df['Shipper Region3'].str.startswith(region) 
    cond_in = df['Consignee Region3'].str.startswith(region) 
    
    if   col == 'Start Date':      
        return df[cond_out & cond_start_date]
    elif col == 'Completion Date': 
        return df[cond_in & cond_compl_date]
    else: 
        return df[(cond_out & cond_start_date)|(cond_in & cond_compl_date)]

In [9]:
# Get orders that have the same values in columns in col
# Used mostly to determine the shortest/longest expected time for order of the same type
def get_df_similar_orders(df,index,col=None):
    col = ["Priority","Requested Trailer Class","Shipper Region3","Consignee Region3"] if col == None else col
    order = df.loc[index,col]
    df_dup = df.loc[:,col]
    df_dup = df_dup[df_dup == order].dropna()
    return df.loc[df_dup.index,:]

def get_min_duration(df,index):
    df1 = get_df_similar_orders(df,index)
    return min(df1["Completion Date"]-df1["Start Date"])

def get_max_duration(df,index):
    df1 = get_df_similar_orders(df,index)
    return max(df1["Completion Date"]-df1["Start Date"])

# Get the bound (earliest/latest) dates in between which an order have to start and complete.
# By default the assumption would be that the order must start in between 14 days prior to 
# originally scheduled all the way to 0 days before; and must completes within 1 day prior to 1 day after
# originally scheduled.
# Because of this, we based the completion date on the date in the ORIGINAL schedule (i.e DF_CLEAN)
def get_bounds(index):
    df = DF_CLEAN
    compl_date = df.loc[index,'Completion Date']
    e_start,l_start = compl_date - get_max_duration(df,index), compl_date - get_min_duration(df,index)
    e_compl,l_compl = compl_date - 1*DAYS, compl_date + 1*DAYS
    return {"e_start":e_start,"l_start":l_start,"e_compl":e_compl,"l_compl":l_compl}

In [10]:
# Can we move the Start/Completion Date of the item at index to this day?
# Condition being, the start date can not be too late compared to the original schedule, or later than the completion date)
# Condition being, the completion date can not be too early or too late compared to the original schedule 
# (see get_bound_dates() for more )
def movable(df,index,col,day):
    e_start,l_start,e_compl,l_compl = get_bounds(index).values()
    compl_date = df.loc[index,"Completion Date"]
    if col == 'Start Date': return day <= l_start and day <= compl_date #or day >= e_start
    elif col == 'Completion Date': return day >= e_compl and day <= l_compl

# Get only orders from day1 that *can be moved* to day2
def get_df_movable_only(df,col,day1,day2):
    df1 = get_df_day(df,day1,col)
    mov = [] 
    for index in df1.index:
        if (movable(df1,index,col,day2)):
            mov.append(index)
    return df1.loc[mov]

# Actually reschedule an order in the scheduling table (by overwriting the date in column col)
# Returns the modified schedule and the updated counts
def move(df,index,new_day,col):
    df_resched = df.copy()
    df_resched.loc[index,col] = new_day
    return df_resched

In [11]:
# Try to move a number of orders that starts on day1 to day2, or that completes on day1 to day2
# Return the modified the df, and the ACTUAL number of orders moved.
# This is because in reality, constraints apply and many times there are not
# enough available orders to satisfy the request
def try_move_orders(df,col,day1,day2,num_orders=None,verbose=False):
    df1 = df.copy()
    df_movable_only = get_df_movable_only(df1,col,day1,day2) # Get ONLY orders from day1 that *can be moved* to day2
    
    if col == "Start Date":
        if day2 > day1:
        # If we are moving the Start date forward
            # Pick furthest completion dates first (longest duration)
            df_movable_only.sort_values(by="Completion Date",ascending=False,inplace=True) 
        elif day1 > day2:
        # If we are moving the Start date back
            # Pick nearest completion dates first (shortest duration)
            df_movable_only.sort_values(by="Completion Date",ascending=True,inplace=True) 
    elif col == "Completion Date":
        if day2 > day1:
        # If we are moving the Completion date forward
            # Pick nearest start dates first (shortest duration)
            df_movable_only.sort_values(by="Start Date",ascending=True,inplace=True) 
        elif day1 > day2:
        # If we are moving the Completion date back
            # Pick furthest start dates first (longest duration)
            df_movable_only.sort_values(by="Start Date",ascending=False,inplace=True) 

    if num_orders == None or num_orders < 0: 
        num_orders = len(df_movable_only)
    df_orders = df_movable_only[:int(num_orders)] # Get just enough requested orders from day1 that can be moved to day2
    
    for index in df_orders.index:
        assert(df1.loc[index,col] == day1)
        df1 = move(df1,index,day2,col) # Finally move such orders to day2
        
    if verbose: 
        print("- Moved {}/{} requested {}s from Day {} to Day {}".format(len(df_orders),num_orders,col,day1,day2))
        print(get_df_count(df1).loc[[day1,day2]])
    return df1, len(df_orders)

In [12]:
# Attempt to move orders between day1 and day2 to achieve a certain balance value
# By default, it would use the mean imbalance level of both days as the goal
def balance(df,day1,day2,bal_val=None,verbose=False):
    df1 = df.copy()
    df_count = get_df_count(df1,day1,day2,fill_missing=True)
    
    if verbose: 
        print("\nDays: {} - {}".format(day1,day2))
        print("Before:\n",df_count.loc[[day1,day2]])
    
    if bal_val == None: bal_val = int(np.average(df_count.loc[[day1,day2],"Imbalance"]))
    out1,in1 = df_count.loc[day1,["Outbound","Inbound"]]
    out2,in2 = df_count.loc[day2,["Outbound","Inbound"]]
    d_bal = bal_val - df_count.loc[day1,"Imbalance"]
    d_out,d_in1,d_in2= 0,0,0
    
    if d_bal > 0:
        if (in1 >= in2 and out1 >= out2):
        # If traffic is heavier today, we prioritize reducing outbound traffic
            # We try to defer as many outbound orders to tomorrow as possible
            # If it's not enough then we make up by taking in tomorrow's inbound orders
            df1,d_out = try_move_orders(df1,"Start Date",day1,day2,d_bal,verbose) 
            df1,d_in1 = try_move_orders(df1,"Completion Date",day2,day1,d_bal-d_out,verbose) 
         
        elif (in1 <= in2 and out1 <= out2): 
        # If traffic is less today, we prioritize increasing inbound traffic 
            # We try to take in as many inbound orders from tomorrow as possible
            # If it's not enough then we make up by deferring the outbound order til tomorrow
            df1,d_in1 = try_move_orders(df1,"Completion Date",day2,day1,d_bal,verbose) 
            df1,d_out = try_move_orders(df1,"Start Date",day1,day2,d_bal-d_in1,verbose) 

        else:
        # If inbound and outbound traffic comparison is mixed, we pick half each type to even everything out
            # We try achieve 1/2 of d_bal by taking in tomorrow inbound orders
            # Then try achieve the rest of d_bal by deferring outbound orders 
            # Go back to step 1 and try to take in more tomorrow inbound orders just in case d_bal is not achieved  
            df1,d_in1 = try_move_orders(df1,"Completion Date",day2,day1,d_bal//2,verbose) 
            df1,d_out = try_move_orders(df1,"Start Date",day1,day2,d_bal-d_in1,verbose) 
            df1,d_in2 = try_move_orders(df1,"Completion Date",day2,day1,d_bal-(d_in1+d_out),verbose) 

    elif d_bal < 0:  # If we need to reduce incoming traffic/ increase outbound traffic
        d_bal = abs(d_bal)
        if (in1 >= in2 and out1 >= out2): 
        # If traffic is heavier today, we prioritize reducing inbound traffic
            # We try to defer as many inbound orders to tomorrow as possible
            # If it's not enough then we make up by taking in the outbound order from tomorrow
            df1,d_in1 = try_move_orders(df1,"Completion Date",day1,day2,d_bal,verbose) 
            df1,d_out = try_move_orders(df1,"Start Date",day2,day1,d_bal-d_in1,verbose) 

        elif (in1 <= in2 and out1 <= out2): 
        # If traffic is less today, we prioritize increasing outbound traffic 
            # We try to take in as many outbound orders from tomorrow as possible
            # If it's not enough then we make up by deferring some inbound truck to tomorrow
            df1,d_out = try_move_orders(df1,"Start Date",day2,day1,d_bal,verbose) 
            df1,d_in1 = try_move_orders(df1,"Completion Date",day1,day2,d_bal-d_out,verbose) 

        else: 
        # If inbound and outbound traffic comparison is mixed, we pick half each type to even everything out 
            # We try achieve 1/2 of d_bal by deferring inbound orders to tomorrow
            # Then try achieve the rest of d_bal by taking in tomorrow outbound orders 
            # Go back to step 1 and try to defer more inbound orders to tomorrow just in case d_bal is not achieved
            df1,d_in1 = try_move_orders(df1,"Completion Date",day1,day2,d_bal//2,verbose) 
            df1,d_out = try_move_orders(df1,"Start Date",day2,day1,d_bal-d_in1,verbose) 
            df1,d_in2 = try_move_orders(df1,"Completion Date",day1,day2,d_bal-(d_in1+d_out),verbose) 
        # Invert the number to get the right statistics
        d_bal *= -1
        d_in1 *= -1
        d_in2 *= -1
        d_out*=-1
    
    d_out*=-1
    
    df_count = get_df_count(df1,day1,day2,fill_missing=True)
    if verbose:
        print("d_bal =",d_bal,": d_out =", d_out,"d_in =",d_in1+d_in2,"(bal_val =",bal_val,")")
        print("After:\n",df_count.loc[[day1,day2]],'\n')

    return df1
        

In [13]:
# Demo balance()
date = pd.Timestamp(2020,12,24,0)

df = get_df_span(DF_CLEAN,date,date+14*DAYS)
df_count = get_df_count(df)
df_count
print(df_count)
day1 = df_count.index[-4]
day2 = df_count.index[-1]

out1,in1 = df_count.loc[day1,["Inbound","Outbound"]]
out1,in1
df1 = balance(df,day1,day2,verbose=True)

            Outbound  Inbound  Imbalance  cImbalance
2020-12-22         2        0         -2          -2
2020-12-24        10       16          6           4
2020-12-25         3        6          3           7
2020-12-26         7       11          4          11
2020-12-27        11        6         -5           6
2020-12-28        13       20          7          13
2020-12-29        18       17         -1          12
2020-12-30        21       18         -3           9
2020-12-31        15       23          8          17
2021-01-01         1        2          1          18
2021-01-02        12       10         -2          16
2021-01-03         8        3         -5          11
2021-01-04        20       23          3          14
2021-01-05        23       26          3          17
2021-01-06        18       25          7          24
2021-01-07        15       23          8          32

Days: 2021-01-04 00:00:00 - 2021-01-07 00:00:00
Before:
             Outbound  Inbound  Imbalance 

In [14]:
def local_k_average(df,k=2,verbose=False):
    df_resched = df.copy()
    df_count = get_df_count(df_resched,fill_missing=True)
    dates = df_count.index
    num_days = len(dates)
    for i in range(num_days):
        k = int(min(num_days-i,k)) # Address the last k-1 problem. (More at the end) 
        local_queue = list(range(i,i+k))
        while (len(local_queue)>=2):
            day1,day2,last = dates[local_queue[0]],dates[local_queue[1]],dates[local_queue[-1]]
            df_count = get_df_count(df_resched,day1,last,fill_missing=True)
            bal_val = int(np.average(df_count.loc[day1:last,"Imbalance"]))
            df_resched = balance(df_resched,day1,day2,bal_val,verbose)
            local_queue.pop(0)
    return df_resched

In [15]:
# Demo balance()
date = pd.Timestamp(2020,12,24,0)

df = get_df_span(DF_CLEAN,date,date+14*DAYS)

df1 = local_k_average(df,k=3,verbose=False)

In [16]:
get_df_count(df1)

Unnamed: 0,Outbound,Inbound,Imbalance,cImbalance
2020-12-23,2,4,2,2
2020-12-24,9,12,3,5
2020-12-25,7,7,0,5
2020-12-26,7,9,2,7
2020-12-27,10,11,1,8
2020-12-28,16,16,0,8
2020-12-29,17,20,3,11
2020-12-30,15,17,2,13
2020-12-31,17,18,1,14
2021-01-01,6,5,-1,13


In [31]:
def local_k_greedy(df,k=2):
    df_resched = df.copy()
    df_count = get_df_count(df_resched,fill_missing=True)
    dates = df_count.index
    for i in range(len(dates)):
        k = int(min(len(dates)-i,k)) # Address the last k-1 problem. (More at the end) 
        local_k_days = list(range(i,i+k))
        first,last = dates[local_k_days[0]],dates[local_k_days[-1]]
        df_count = get_df_count(df_resched,first,last,fill_missing=True)
        bal_val = int(np.average(df_count.loc[first:last,"Imbalance"]))
        while (len(local_k_days)>=2):
            curr,next_ = dates[local_k_days[0]],dates[local_k_days[1]],
            df_resched = balance(df_resched,curr,next_,bal_val)
            local_k_days.pop(0)
    return df_resched

In [32]:
# Demo balance()
date = pd.Timestamp(2020,12,24,0)

df = get_df_span(DF_CLEAN,date,date+14*DAYS)

df2 = local_k_greedy(df,k=3)
get_df_count(df2)

Unnamed: 0,Outbound,Inbound,Imbalance,cImbalance
2020-12-23,2,4,2,2
2020-12-24,9,12,3,5
2020-12-25,7,7,0,5
2020-12-26,7,9,2,7
2020-12-27,9,10,1,8
2020-12-28,16,16,0,8
2020-12-29,15,18,3,11
2020-12-30,18,20,2,13
2020-12-31,17,18,1,14
2021-01-01,6,5,-1,13


In [33]:
start_date = pd.Timestamp(2020,1,1,0)
end_date = pd.Timestamp(2020,1,31,0)
df = get_df_span(DF_CLEAN,start_date,end_date)
df_time = pd.DataFrame(index=[2,3,4,5])

for k in df_time.index:
    #    <<---- Begin timer ---->>
    start_time = time.time()
    #    <<--- Begin execution ->>
    
    df_resched = local_k_greedy(df,k)
    
    #    <<--- End execution --->>
    end_time = time.time()
    #    <<----- End timer ----->>
        # Post execution
    df_resched.to_csv("dfs/"+"k_greedy_"+str(k))
    df_time.loc[k,"Time (s)"] = end_time-start_time
    print("k = {} : {:.6f}".format(k,df_time.loc[k,"Time (s)"]))

k = 2 : 155.782754
k = 3 : 228.919222
k = 4 : 318.258224


KeyboardInterrupt: 

# Demos

In [17]:
#Demo _df_count
first_day = pd.Timestamp(2019,3,13,0)
last_day = pd.Timestamp(2019,3,19,0)

# df_count = get_df_count(DF_CLEAN,start_date,end_date)
df_count = get_df_count(DF_CLEAN,first_day,last_day,fill_missing=True)
df_count

Unnamed: 0,Outbound,Inbound,Imbalance,cImbalance
2019-03-13,1,0,-1,-1
2019-03-14,0,0,0,-1
2019-03-15,0,0,0,-1
2019-03-16,0,0,0,-1
2019-03-17,0,0,0,-1
2019-03-18,0,0,0,-1
2019-03-19,2,0,-2,-3


In [18]:
df = DF_CLEAN
get_last_day(df)

Timestamp('2021-04-24 00:00:00')

In [19]:
#Demo get_df_days()

df = DF_CLEAN
first_day = pd.Timestamp(2020,1,1,0)
last_day = pd.Timestamp(2020,1,7,0)

# get_df_span(df,first_day,last_day)
get_df_day(DF_CLEAN,pd.Timestamp(2020,1,15,0),region="US")

Unnamed: 0,Priority,Requested Trailer Class,Shipper Region3,Consignee Region3,Start Date,Completion Date
1048,Standard,HEATER,USSCAT,PQ2MON,2020-01-15,2020-01-17
4071,Standard,REEFER,USSWCS,PQ2MON,2020-01-15,2020-01-21
4474,Standard,REEFER,USEASE,PQ2MON,2020-01-15,2020-01-16
5119,Standard,REEFER,USSWCS,PQ2MON,2020-01-15,2020-01-19
5120,Standard,REEFER,USSWCS,PQ2MON,2020-01-15,2020-01-22
8478,Standard,DRY,USMEON,PQ2MON,2020-01-15,2020-01-17
9772,Standard,DRY,PQ2S,USMEIS,2020-01-13,2020-01-15
12556,Standard,DRY,PQ2S,USNWWA,2020-01-08,2020-01-15
13172,Standard,DRY,USEAC,PQ2MON,2020-01-15,2020-01-16
14385,Standard,HEATER,USWIGR,PQ2MON,2020-01-15,2020-01-17


In [20]:
# Demo get_bounds()
df = DF_CLEAN
index = df.index[5]

print(df.loc[index,'Start Date':'Completion Date'],'\n')
# print("Violation: ",check_bound_violations(df,index))
get_bounds(index)

Start Date         2020-04-08 00:00:00
Completion Date    2020-04-10 00:00:00
Name: 5, dtype: object 



{'e_start': Timestamp('2020-04-04 00:00:00'),
 'l_start': Timestamp('2020-04-08 00:00:00'),
 'e_compl': Timestamp('2020-04-09 00:00:00'),
 'l_compl': Timestamp('2020-04-11 00:00:00')}

In [21]:
get_max_duration(DF,55)

Timedelta('14 days 00:00:00')

In [22]:
# Demo get_df_movable

date1 = pd.Timestamp(2020,1,4,0)
date2 = pd.Timestamp(2020,1,7,0)
get_df_movable_only(DF,'Start Date',date1,date2)

Unnamed: 0,Client Grouping1,Customer Group,Requested Mode,Priority,Requested Trailer Class,Shipper Region3,Consignee Region3,Lane ID - City to City,Start Date,Completion Date,Order #,Avg. Weekly Frequency
16970,Corporate,Canada Post Corp.,ROAD,Expedited,DRY,PQ2MON,ABSCAL,"ST LAURENT,PQ/IL to CALGARY,AB/",2020-01-04,2020-01-07,4021937,0
19505,Corporate,General Mills,ROAD,Standard,REEFER,PQ2MON,ABNEDM,"BOUCHERVILLE,PQ/ to EDMONTON,AB/",2020-01-04,2020-01-10,4038583,0
19689,Corporate,General Mills,ROAD,Standard,REEFER,PQ2MON,BC2VAN,"BOUCHERVILLE,PQ/ to SURREY,BC/",2020-01-04,2020-01-10,4038542,0


In [23]:
# # Pseudo-code
# # Our main k-local greedy algorithm that re-tunes our schedule
# def k_local_greedy(schedule,k):
#     for day in schedule.all_days:
#         k = min(schedule.num_days-day,k) # Pick the number of days left if < k
#         local_k_days = schedule[day:day+k]
#         bal_val = average_imb(local_k_day)
#         while local_k_days.size > 1:
#             curr,next,last = local_k_days[0], local_k_days[1], local_k_days[-1]
#             schedule.balance(curr,next,bal_val)
#             local_k_days.pop(curr)
#     return schedule




```
        # If traffic is heavier on current day,
            # We prioritize deferring curr’s outbound orders to next
            # If it's not enough then we also take in next’s inbound orders
            
        # If traffic is lighter on current day, 
            # We prioritize taking in next's inbound orders
            # If it's not enough then we also defer the outbound order to next day
            
        # If inbound and outbound traffic comparison is mixed, we will try to take in some and off load some
            # We try to achieve 1/2 of d_bal by taking in next's inbound orders
            # We try to achieve the rest d_bal by deferring outbound orders to next
            
            ```

In [24]:
# def balance(curr,next,bal_val):
#     d_bal = bal_val - curr.imb # The amount of imbalance by which we need to adjust for curr day
#     if d_bal > 0: # When we want to increase the balance level
#         if (curr.in >= next.in and curr.out >= next.out):  # If traffic is heavier on current day,
#             d_out = move_orders("outbound",from=curr,to=next,amount=d_bal) # We prioritize deferring outbound orders
#             d_in  = move_orders("inbound",from=next,to=curr,amount=(d_bal-d_out))            
        
#         elif (curr.in <= next.in and curr.out <= next.out): # If traffic is lighter on current day, 
#             d_in  = move_orders("inbound",next,curr,d_bal) # We prioritize taking in next's inbound orders
#             d_out = move_orders("outbound",curr,next,(d_bal-d_in))               
        
#         else:  # If traffic comparison is mixed, we will try to take in some inbound orders and defer some outbound
#             d_in  = move_orders("inbound",next,curr,d_bal * 1/2) # Take in some inbound orders
#             d_out = move_orders("outbound",curr,next,(d_bal-d_in1)) # Defer some outbound orders to next
            
#     elif d_bal < 0:  # If we need to reduce incoming traffic/ increase outbound traffic
#         d_bal = abs(d_bal) # When we want to decrease the balance level
#         if (curr.in >= next.in and curr.out >= next.out): # If traffic is heavier on current day,
#             d_in  = move_orders("inbound",curr,next,d_bal) # We prioritize deferring curr’s outbound orders to next
#             d_out = move_orders("outbound",next,curr,(d_bal-d_in)) 

#         elif (curr.in <= next.in and curr.out <= next.out): # If traffic is lighter on current day
#             d_out = move_orders("outbound",next,curr,d_bal) # We prioritize taking in next's outbound orders
#             d_in  = move_orders("inbound",curr,next,d_bal-d_out)
            
#         else: # If traffic comparison is mixed, we will try to defer some inbound orders and take in some outbound ones
#             d_in  = move_orders("inbound",curr,next,d_bal *1/2) # Defer some inbound orders to next
#             d_out = move_orders("outbound",next,curr,(d_bal-d_in)) # Take in some of next's outbound orders           


In [25]:
# def move_orders(direction,from,to,amount):
#     movable_orders = get_movable_orders_only(direction,from,to) #Only get orders that can be moved from -> to
#     if direction == 
#         if day2 > day1:
#         # If we are moving the Start date forward
#             # Pick furthest completion dates first (longest duration)
#             df_movable_only.sort_values(by="Completion Date",ascending=False,inplace=True) 
#         elif day1 > day2:
#         # If we are moving the Start date back
#             # Pick nearest completion dates first (shortest duration)
#             df_movable_only.sort_values(by="Completion Date",ascending=True,inplace=True) 
#     elif col == "Completion Date":
#         if day2 > day1:
#         # If we are moving the Completion date forward
#             # Pick nearest start dates first (shortest duration)
#             df_movable_only.sort_values(by="Start Date",ascending=True,inplace=True) 
#         elif day1 > day2:
#         # If we are moving the Completion date back
#             # Pick furthest start dates first (longest duration)
#             df_movable_only.sort_values(by="Start Date",ascending=False,inplace=True) 

In [26]:
# # Try to move a number of orders that starts on day1 to day2, or that completes on day1 to day2
# # Return the modified the df, and the ACTUAL number of orders moved.
# # This is because in reality, constraints apply and many times there are not
# # enough available orders to satisfy the request
# def try_move_orders(df,col,day1,day2,num_orders=None,verbose=False):
#     df1 = df.copy()
#     df_movable_only = get_df_movable_only(df1,col,day1,day2) # Get ONLY orders from day1 that *can be moved* to day2
    
#     if col == "Start Date":
#         if day2 > day1:
#         # If we are moving the Start date forward
#             # Pick furthest completion dates first (longest duration)
#             df_movable_only.sort_values(by="Completion Date",ascending=False,inplace=True) 
#         elif day1 > day2:
#         # If we are moving the Start date back
#             # Pick nearest completion dates first (shortest duration)
#             df_movable_only.sort_values(by="Completion Date",ascending=True,inplace=True) 
#     elif col == "Completion Date":
#         if day2 > day1:
#         # If we are moving the Completion date forward
#             # Pick nearest start dates first (shortest duration)
#             df_movable_only.sort_values(by="Start Date",ascending=True,inplace=True) 
#         elif day1 > day2:
#         # If we are moving the Completion date back
#             # Pick furthest start dates first (longest duration)
#             df_movable_only.sort_values(by="Start Date",ascending=False,inplace=True) 

#     if num_orders == None or num_orders < 0: 
#         num_orders = len(df_movable_only)
#     df_orders = df_movable_only[:int(num_orders)] # Get just enough requested orders from day1 that can be moved to day2
    
#     for index in df_orders.index:
#         assert(df1.loc[index,col] == day1)
#         df1 = move(df1,index,day2,col) # Finally move such orders to day2
        
#     if verbose: 
#         print("- Moved {}/{} requested {}s from Day {} to Day {}".format(len(df_orders),num_orders,col,day1,day2))
#         print(get_df_count(df1).loc[[day1,day2]])
#     return df1, len(df_orders)