# Adapted from
<br>[1.00 optimized logistics solution (<30sec)](http://https://www.kaggle.com/afenzdkyayqyydsbsuyj/1-00-optimized-logistics-solution-30sec) by [AFEnzdkyayqYYDsBSUYJ](http://https://www.kaggle.com/afenzdkyayqyydsbsuyj)
<br>[shopee-logistics](https://www.kaggle.com/mylee2009/shopee-logistics-1-0-script-solution) by [MyleeSG](https://www.kaggle.com/mylee2009)

In [None]:
!pip install xlrd
!pip install openpyxl
import numpy as np
import pandas as pd
from datetime import datetime, timedelta, date

import os
for dirname, _, filenames in os.walk('/kaggle/input/shopee-code-league-20/_DA_Logistics'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
filepath = '/kaggle/input/shopee-code-league-20/_DA_Logistics/delivery_orders_march.csv'

dtype = {
    'orderid': np.int64,
    'pick': np.int64,
    '1st_deliver_attempt': np.int64,
    '2nd_deliver_attempt': np.float64,
    'buyeraddress': np.object,
    'selleraddress': np.object,
}
df = pd.read_csv(filepath, dtype=dtype)
df.head()


# convert unix datetime(seconds)stamps to unix datetime(date)stamps

In [None]:
GMT8_OFFSET = 3600 * 8
DURATION_1DAY = 3600 * 24

dt_columns = ['pick', '1st_deliver_attempt', '2nd_deliver_attempt']
df[dt_columns[-1]] = df['2nd_deliver_attempt'].fillna(0).astype(np.int64)
df[dt_columns] = (df[dt_columns] + GMT8_OFFSET) // DURATION_1DAY
df.head()

# compute number of working days between time intervals using np.busday_count

In [None]:
t1 = df['pick'].values.astype('datetime64[D]')
t2 = df['1st_deliver_attempt'].values.astype('datetime64[D]')
t3 = df['2nd_deliver_attempt'].values.astype('datetime64[D]')

WORKDAYS = '1111110'
HOLIDAYS = ['2020-03-08','2020-03-25', '2020-03-30', '2020-03-31']
            
df['num_days1'] = np.busday_count(t1, t2, weekmask=WORKDAYS, holidays=HOLIDAYS)
df['num_days2'] = np.busday_count(t2, t3, weekmask=WORKDAYS, holidays=HOLIDAYS)
df.head()

# convert address to city

In [None]:
df['destination'] = df['buyeraddress'].str.split(' ').str[-1].str.lower()
df['origin'] = df['selleraddress'].str.split(' ').str[-1].str.lower()
df.head()

# compute sla based on addresses

In [None]:
sla = pd.read_excel('/kaggle/input/shopee-code-league-20/_DA_Logistics/SLA_matrix.xlsx')
sla

In [None]:
SLA = pd.DataFrame(
    data=[
        ["manila","manila",3],
        ["manila","luzon",5],
        ["manila","visayas",7],
        ["manila","mindanao",7],
        ["luzon","manila",5],
        ["luzon","luzon",5],
        ["luzon","visayas",7],
        ["luzon","mindanao",7],
        ["visayas","manila",7],
        ["visayas","luzon",7],
        ["visayas","visayas",7],
        ["visayas","mindanao",7],
        ["mindanao","manila",7],
        ["mindanao","luzon",7],
        ["mindanao","visayas",7],
        ["mindanao","mindanao",7]
    ],
    columns=["origin", "destination", "sla1"]
)
SLA

In [None]:
df = pd.merge(df,SLA, on=["destination", "origin"])
df['sla2'] = 3
df.head()

# compute if deliver is late

In [None]:
df['is_late'] = (df['num_days1'] > df['sla1']) | (df['num_days2'] > df['sla2'])
df['is_late'] = df['is_late'].astype(int)
df.head()