In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import random
from random import randint
import time

In [2]:
FILE_NAME = "PQ2MON - Orders - Weeks -1 to -109 (1).xls.xlsx"
DF = pd.read_excel(FILE_NAME)
DF.head(5)

Unnamed: 0,Client Grouping1,Customer Group,Requested Mode,Priority,Requested Trailer Class,Shipper Region3,Consignee Region3,Lane ID - City to City,Start Date,Completion Date,Order #,Avg. Weekly Frequency
0,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-04-16,2019-04-18,3790088,0
1,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-05-07,2019-05-09,3810953,0
2,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-06-07,2019-06-10,3841144,0
3,Corporate,3M Canada Company,ROAD,Standard,DRY,ON2TOR,PQ2S,"MILTON,ON/ to DRUMMONDVILLE,PQ/",2019-07-09,2019-07-11,3869973,0
4,Corporate,3M Canada Company,ROAD,Standard,DRY,PQ2MON,USMWIA,"STE THERESE,PQ/TE to PRAIRIE DU CHIEN,WI/",2020-02-26,2020-03-02,4066936,0


In [12]:
DAYS = pd.Timedelta(days=1)

In [13]:
# Clean the irrelevant columns, only keeping the rows that has PQ data
# internal are orders that starts and completes within the PQ region itself.
# external are orders that goes from anyhwere in PQ to other region and vice versa
def get_df_clean(df,drop_columns=None,region='PQ'):
    if drop_columns == None: drop_columns = ["Customer Group","Requested Mode","Order #","Avg. Weekly Frequency","Lane ID - City to City","Client Grouping1"]
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    df_clean = df[cond_out|cond_in].drop(columns=drop_columns)
    df_clean.loc[:,'Shipper Region3'].fillna("UNKNOWN",inplace=True)
    df_clean.loc[:,'Consignee Region3'].fillna("UNKNOWN",inplace=True)
    return df_clean

In [14]:
DF_CLEAN = get_df_clean(DF)
DF_CLEAN

Unnamed: 0,Priority,Requested Trailer Class,Shipper Region3,Consignee Region3,Start Date,Completion Date
0,Standard,DRY,ON2TOR,PQ2S,2019-04-16,2019-04-18
1,Standard,DRY,ON2TOR,PQ2S,2019-05-07,2019-05-09
2,Standard,DRY,ON2TOR,PQ2S,2019-06-07,2019-06-10
3,Standard,DRY,ON2TOR,PQ2S,2019-07-09,2019-07-11
4,Standard,DRY,PQ2MON,USMWIA,2020-02-26,2020-03-02
...,...,...,...,...,...,...
23864,Standard,DRY,PQ2MON,BC2VAN,2020-06-18,2020-06-22
23865,Standard,REEFER,PQ2MON,ABNEDM,2020-02-12,2020-02-18
23866,Standard,REEFER,PQ2MON,ABNEDM,2020-02-12,2020-02-20
23867,Standard,REEFER,PQ2MON,ABNEDM,2020-02-13,2020-02-18


In [15]:
# Get the counts for a schedule, such as inbound and out bound figures. Also calculates the imbalance levels
def get_df_count(df,first_day=None,last_day=None,fill_missing=False,region='PQ'):
    first_day = get_first_day(df) if first_day == None else first_day
    last_day  = get_last_day(df)  if last_day == None else last_day
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    
    df_out = df[cond_out]['Start Date'].value_counts().sort_index(axis=0)
    df_in  = df[cond_in]['Completion Date'].value_counts().sort_index(axis=0)
    df_count = pd.concat([df_out, df_in],axis=1).fillna(0, downcast='infer').rename(columns={"Start Date": "Outbound", "Completion Date": "Inbound"})
    if fill_missing and first_day and last_day:
        df_count = df_count.join(pd.DataFrame(index=get_all_days(first_day,last_day)),how='outer').fillna(0) 
    df_count["Imbalance"]  = df_count["Inbound"] - df_count["Outbound"]
    df_count["cImbalance"] = df_count["Imbalance"].cumsum()
    return df_count.loc[first_day:last_day].astype('int64')

def get_all_days(first_day,last_day):
    if type(first_day) == pd.Timestamp and type(last_day) == pd.Timestamp:
        return [first_day + i*DAYS for i in range(int((last_day-first_day).days+1))]
    else: return list()

def get_first_day(df,col=None,region='PQ'):
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    if col == 'Start Date':
        return df[cond_out].loc[:,col].min()
    elif col == 'Completion Date':
        return df[cond_in].loc[:,col].min()
    else:
        return min(df[cond_out].loc[:,'Start Date'].min(),df[cond_in].loc[:,'Completion Date'].min())
    
def get_last_day(df,col=None,region='PQ'):
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    if col == 'Start Date':
        return df[cond_out].loc[:,col].max()
    elif col == 'Completion Date':
        return df[cond_in].loc[:,col].max()
    else:
        return max(df[cond_out].loc[:,'Start Date'].max(),df[cond_in].loc[:,'Completion Date'].max())

In [17]:
# Get all the orders that start and/or completes on a specific day
def get_df_day(df,day,col=None,region='PQ'):
    cond_out = df['Shipper Region3'].str.startswith(region)
    cond_in  = df['Consignee Region3'].str.startswith(region)
    cond_start_date = df['Start Date'] == day
    cond_compl_date = df['Completion Date'] == day
    
    if   col == 'Start Date':      
        return df[cond_out & cond_start_date]
    elif col == 'Completion Date': 
        return df[cond_in & cond_compl_date]
    else: 
        return df[(cond_out & cond_start_date)|(cond_in & cond_compl_date)]

# Get all orders that starts and/or completes in a specific time span
def get_df_span(df,first_day=None,last_day=None,col=None,region='PQ'):
    first_day = get_first_day(df) if first_day == None else first_day
    last_day  = get_last_day(df)  if last_day == None else last_day
    cond_start_date = (df["Start Date"] >= first_day) & (df["Start Date"] <= last_day)
    cond_compl_date = (df["Completion Date"] >= first_day) & (df["Completion Date"] <= last_day)
    cond_out = df['Shipper Region3'].str.startswith(region) 
    cond_in = df['Consignee Region3'].str.startswith(region) 
    
    if   col == 'Start Date':      
        return df[cond_out & cond_start_date]
    elif col == 'Completion Date': 
        return df[cond_in & cond_compl_date]
    else: 
        return df[(cond_out & cond_start_date)|(cond_in & cond_compl_date)]

In [16]:
#Demo _df_count
first_day = pd.Timestamp(2019,3,13,0)
last_day = pd.Timestamp(2019,3,19,0)


# df_count = get_df_count(DF_CLEAN,start_date,end_date)
df_count = get_df_count(DF_CLEAN,first_day,last_day,fill_missing=True)
df_count

Unnamed: 0,Outbound,Inbound,Imbalance,cImbalance
2019-03-13,1,0,-1,-1
2019-03-14,0,0,0,-1
2019-03-15,0,0,0,-1
2019-03-16,0,0,0,-1
2019-03-17,0,0,0,-1
2019-03-18,0,0,0,-1
2019-03-19,2,0,-2,-3


In [18]:
#Demo get_df_days()

df = DF_CLEAN
first_day = pd.Timestamp(2020,1,1,0)
last_day = pd.Timestamp(2020,1,7,0)

# get_df_span(df,first_day,last_day)
get_df_day(DF_CLEAN,pd.Timestamp(2020,1,15,0),region="US")

Unnamed: 0,Priority,Requested Trailer Class,Shipper Region3,Consignee Region3,Start Date,Completion Date
1048,Standard,HEATER,USSCAT,PQ2MON,2020-01-15,2020-01-17
4071,Standard,REEFER,USSWCS,PQ2MON,2020-01-15,2020-01-21
4474,Standard,REEFER,USEASE,PQ2MON,2020-01-15,2020-01-16
5119,Standard,REEFER,USSWCS,PQ2MON,2020-01-15,2020-01-19
5120,Standard,REEFER,USSWCS,PQ2MON,2020-01-15,2020-01-22
8478,Standard,DRY,USMEON,PQ2MON,2020-01-15,2020-01-17
9772,Standard,DRY,PQ2S,USMEIS,2020-01-13,2020-01-15
12556,Standard,DRY,PQ2S,USNWWA,2020-01-08,2020-01-15
13172,Standard,DRY,USEAC,PQ2MON,2020-01-15,2020-01-16
14385,Standard,HEATER,USWIGR,PQ2MON,2020-01-15,2020-01-17
