In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import time

In [None]:
data = pd.read_csv("delivery_orders_march.csv", parse_dates=['pick', '1st_deliver_attempt', '2nd_deliver_attempt'])

In [None]:
data.head()

In [None]:
data['has_2nd'] = data['2nd_deliver_attempt'].isna()

In [None]:
data.head()

In [None]:
def parse_address(s):
    return s.split()[-1].lower()

data['buyer'] = data['buyeraddress'].map(parse_address)
data['seller'] = data['selleraddress'].map(parse_address)

In [None]:
data['buyer'].value_counts()

In [None]:
data['seller'].value_counts()

In [None]:
data.head()

In [None]:
def parse_sla(df_row):
    a = df_row['buyer']
    b = df_row['seller']
    if a == "manila":
        if b == "manila":
            return 3
        elif b == "luzon":
            return 5
        else:
            return 7
    elif a == "luzon":
        if b == "manila" or b == "luzon":
            return 5
        else:
            return 7
    else:
        return 7

In [None]:
data['sla_time'] = data.apply(parse_sla, axis=1)

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
def parse_time(x):
    if np.isnan(float(x)):
        return [0,0,0]
    return time.ctime(int(float(x))).split()[:3]

data['pick_date'] = data['pick'].map(parse_time)
data['1st_date'] = data['1st_deliver_attempt'].map(parse_time)
data['2nd_date'] = data['2nd_deliver_attempt'].map(parse_time)

In [None]:
data.head()

In [None]:
holidays = [1,8,15,22,29,25,30,31,36,43,50,57]
def count_holidays(a, b):
    cnt = 0
    aday = int(a[-1])
    bday = int(b[-1])
    if a[1] == 'Apr':
        aday += 31
    if b[1] == 'Apr':
        bday += 31
    for x in holidays:
        if aday <= x <= bday:
            cnt+=1
    return cnt

In [None]:
data['pick_to_first_cnt'] = data.apply(lambda x: count_holidays(x['pick_date'], x['1st_date']), axis=1)

In [None]:
data['first_to_second_cnt'] = data.apply(lambda x: count_holidays(x['1st_date'], x['2nd_date']), axis=1)

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
def get_month(a):
    return a[1]
data['pick_month'] = data['pick_date'].map(get_month)

In [None]:
data['pick_month'].value_counts()

In [None]:
data['1st_month'] = data['1st_date'].map(get_month)
data['1st_month'].value_counts()

In [None]:
data['2nd_month'] = data['2nd_date'].map(get_month)
data['2nd_month'].value_counts()

In [None]:
type(data['pick_month'][0])

In [None]:
def solve(df_row):
    if not df_row['has_2nd']:
        pick = int(df_row['pick_date'][-1])
        first = int(df_row['1st_date'][-1])
        second = int(df_row['2nd_date'][-1])
        
        if df_row['pick_month'] == 'Apr':
            pick += 31
        if df_row['1st_month'] == 'Apr':
            first += 31
        if df_row['2nd_month'] == 'Apr':
            second += 31
        
        time_taken_first = first-pick-int(df_row['pick_to_first_cnt'])
        time_taken_second = second-first-int(df_row['first_to_second_cnt'])
        assert(time_taken_first >= 0)
        assert(time_taken_second >= 0)
        return time_taken_first <= int(df_row['sla_time']) and time_taken_second <= 3
    else:
        pick = int(df_row['pick_date'][-1])
        first = int(df_row['1st_date'][-1])

        
        if df_row['pick_month'] == 'Apr':
            pick += 31
        if df_row['1st_month'] == 'Apr':
            first += 31
        time_taken = first-pick-int(df_row['pick_to_first_cnt'])
        assert(time_taken >= 0)
        return time_taken <= int(df_row['sla_time'])

In [None]:
data['answer'] = data.apply(solve, axis=1)

In [None]:
answer = pd.DataFrame({'orderid': data['orderid'].values, 'is_late': ~data['answer'].values})

In [None]:
answer.head()

In [None]:
answer['is_late'] = answer['is_late'].astype(int)

In [None]:
answer.head()

In [None]:
answer['is_late'].value_counts()

In [None]:
answer.to_csv('predictions3.csv', index=False)