In [None]:
import pandas as pd
import numpy as np
from ggv.utils.context import create_default_context
import warnings
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import plotly
from plotly.subplots import make_subplots
import datetime
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')
context = create_default_context()
bq_db= context.get_ds('YOUR CONTEXT')

### Cancel Time


In [None]:
def get_cancel_time(country,start_date,end_date,product_name): 

       query= f""" 
       SELECT
          system_order_request_id,
          booking_type,
          DATE_DIFF(cancelled_at, created_at, millisecond) AS cancel_time,
          created_at,
          cancelled_at
        FROM
          `table_{country}`
        WHERE
          status= 'cancelled'
          AND DATETIME(created_at) BETWEEN DATETIME('{start_date}')
          AND DATETIME('{end_date}')
          AND product_name= '{product_name}'
          AND cancelled_at is not null
        GROUP BY
          1,
          2,
          3,
          4,
          5
       """ 
       return bq_db.df_from_sql(query)


In [None]:
def vehicle_cancel(df): 
    df['cancel_minute']= round((df['cancel_time'] / 60000),4) 
    df2=df.sort_values(by=['cancel_minute']).reset_index(drop=True)
    df_cancel_time=df2
    df_cancel_time['count']=1
    df_cancel_time['cum_perc']= 100 * (df_cancel_time['count'].cumsum()) / df_cancel_time['count'].sum() 
    return df_cancel_time 

In [None]:
def order_type_count_cancel(df, start_time, end_time=0):
    if start_time < 1: 
        df2= df[(df['cancel_minute'] <1) & (df['cancel_minute']>0)]
        data= [] 
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
        
    if start_time >= 1 and start_time <= 120: 
        df2= df[(df['cancel_minute']>= start_time) & (df['cancel_minute']< end_time)]
        data=[]
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
    
    if start_time > 120 and start_time < 240: 
        df2= df[(df['cancel_minute'] >= 60) & (df['cancel_minute'] < 240)]
        data= [] 
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
    
    if start_time >= 240: 
        df2= df[df['cancel_minute'] >= 240]
        data= [] 
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
    
    return data 

In [None]:
def get_y_axis(df): 
    x_axis= ['<1','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20-29.99','30-39.99','40-49.99','50-59.99','60-239.99','>=240']
    y_axis= [order_type_count_cancel(df,0.9),
         order_type_count_cancel(df,1,2),
         order_type_count_cancel(df,2,3),
         order_type_count_cancel(df,3,4),
         order_type_count_cancel(df,4,5),
         order_type_count_cancel(df,5,6),
         order_type_count_cancel(df,6,7),
         order_type_count_cancel(df,7,8),
         order_type_count_cancel(df,8,9),
         order_type_count_cancel(df,9,10),
         order_type_count_cancel(df,10,11),
         order_type_count_cancel(df,11,12),
         order_type_count_cancel(df,12,13),
         order_type_count_cancel(df,13,14),
         order_type_count_cancel(df,14,15),
         order_type_count_cancel(df,15,16),
         order_type_count_cancel(df,16,17),
         order_type_count_cancel(df,17,18),
         order_type_count_cancel(df,18,19),
         order_type_count_cancel(df,19,20),
         order_type_count_cancel(df,20,30),
         order_type_count_cancel(df,30,40),
         order_type_count_cancel(df,40,50),
         order_type_count_cancel(df,50,60),
         order_type_count_cancel(df,60,120),
         order_type_count_cancel(df,240)]
    return x_axis, y_axis 

In [None]:
def y_axis_split(y_axis): 
    y1=[] 
    y2=[] 
    y3=[]
    for y in y_axis: 
        y1.append(y[0])
        y2.append(y[1])
        y3.append(y[2])
    return y1,y2,y3

In [None]:
def vehicle_cancel_list(df): 
    x_vehicle_cancel= ['<1','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20-29.99','30-39.99','40-49.99','50-59.99','60-239.99','>=240']
    y_vehicle_cancel=[df[df['cancel_minute']<1]['cum_perc'].max(),
          df[(df['cancel_minute']>1)& (df['cancel_minute']<2)]['cum_perc'].max(),
          df[(df['cancel_minute']>2)& (df['cancel_minute']<3)]['cum_perc'].max(),
          df[(df['cancel_minute']>3)& (df['cancel_minute']<4)]['cum_perc'].max(),
          df[(df['cancel_minute']>4)& (df['cancel_minute']<5)]['cum_perc'].max(),
          df[(df['cancel_minute']>5)& (df['cancel_minute']<6)]['cum_perc'].max(),
          df[(df['cancel_minute']>6)& (df['cancel_minute']<7)]['cum_perc'].max(),
          df[(df['cancel_minute']>7)& (df['cancel_minute']<8)]['cum_perc'].max(),
          df[(df['cancel_minute']>8)& (df['cancel_minute']<9)]['cum_perc'].max(),
          df[(df['cancel_minute']>9)& (df['cancel_minute']<10)]['cum_perc'].max(),
          df[(df['cancel_minute']>10)& (df['cancel_minute']<11)]['cum_perc'].max(),
          df[(df['cancel_minute']>11)& (df['cancel_minute']<12)]['cum_perc'].max(),
          df[(df['cancel_minute']>12)& (df['cancel_minute']<13)]['cum_perc'].max(),
          df[(df['cancel_minute']>13)& (df['cancel_minute']<14)]['cum_perc'].max(),
          df[(df['cancel_minute']>14)& (df['cancel_minute']<15)]['cum_perc'].max(),
          df[(df['cancel_minute']>15)& (df['cancel_minute']<16)]['cum_perc'].max(),
          df[(df['cancel_minute']>16)& (df['cancel_minute']<17)]['cum_perc'].max(),
          df[(df['cancel_minute']>17)& (df['cancel_minute']<18)]['cum_perc'].max(),
          df[(df['cancel_minute']>18)& (df['cancel_minute']<19)]['cum_perc'].max(),
          df[(df['cancel_minute']>19)& (df['cancel_minute']<20)]['cum_perc'].max(),
          df[(df['cancel_minute']>=20)& (df['cancel_minute']<30)]['cum_perc'].max(),
          df[(df['cancel_minute']>=30)& (df['cancel_minute']<40)]['cum_perc'].max(),
          df[(df['cancel_minute']>=40)& (df['cancel_minute']<50)]['cum_perc'].max(),
          df[(df['cancel_minute']>=50)& (df['cancel_minute']<60)]['cum_perc'].max(),
          df[(df['cancel_minute']>=60)& (df['cancel_minute']<240)]['cum_perc'].max(),
          df[df['cancel_minute']>=240]['cum_perc'].max()
          ]
    return x_vehicle_cancel, y_vehicle_cancel

In [None]:
def vehicle_order_cancel(df, order):
    df2= df.loc[df['booking_type']==f'{order}']
    df2= df2[['system_order_request_id','booking_type','cancel_time', 'cancel_minute']]
    df2['count']= 1 
    df2['cum_perc']= 100 * (df2['count'].cumsum()) / df2['count'].sum() 
    
    x_vehicle_order_cancel= ['<1','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20-29.99','30-39.99','40-49.99','50-59.99','60-239.99','>=240']
    y_vehicle_order_cancel=[df2[df2['cancel_minute']<1]['cum_perc'].max(),
          df2[(df2['cancel_minute']>1)& (df2['cancel_minute']<2)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>2)& (df2['cancel_minute']<3)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>3)& (df2['cancel_minute']<4)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>4)& (df2['cancel_minute']<5)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>5)& (df2['cancel_minute']<6)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>6)& (df2['cancel_minute']<7)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>7)& (df2['cancel_minute']<8)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>8)& (df2['cancel_minute']<9)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>9)& (df2['cancel_minute']<10)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>10)& (df2['cancel_minute']<11)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>11)& (df2['cancel_minute']<12)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>12)& (df2['cancel_minute']<13)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>13)& (df2['cancel_minute']<14)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>14)& (df2['cancel_minute']<15)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>15)& (df2['cancel_minute']<16)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>16)& (df2['cancel_minute']<17)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>17)& (df2['cancel_minute']<18)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>18)& (df2['cancel_minute']<19)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>19)& (df2['cancel_minute']<20)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>=20)& (df2['cancel_minute']<30)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>=30)& (df2['cancel_minute']<40)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>=40)& (df2['cancel_minute']<50)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>=50)& (df2['cancel_minute']<60)]['cum_perc'].max(),
          df2[(df2['cancel_minute']>=60)& (df2['cancel_minute']<240)]['cum_perc'].max(),
          df2[df2['cancel_minute']>=240]['cum_perc'].max()
          ]
    return x_vehicle_order_cancel, y_vehicle_order_cancel

In [None]:
cancel= get_cancel_time('vn','2021-10-11','2022-10-10','TRANSPORT')

### VAN

In [None]:
van_cancel= vehicle_cancel(cancel)

In [None]:
x_axis_van, y_axis_van= get_y_axis(van_cancel)

In [None]:
y1_van, y2_van, y3_van= y_axis_split(y_axis_van) 

In [None]:
x_van_cancel, y_van_cancel= vehicle_cancel_list(van_cancel)

In [None]:
x_van_ondemand, y_van_ondemand= vehicle_order_cancel(van_cancel, 'On Demand')

In [None]:
x_van_sameday, y_van_sameday= vehicle_order_cancel(van_cancel, 'Same Day')

In [None]:
x_van_nextday, y_van_nextday= vehicle_order_cancel(van_cancel, 'Next Day')

In [None]:
fig= make_subplots(specs=[[{'secondary_y':True}]])
fig.add_trace(go.Bar(name='Next Day',x=x_axis_van, y=y1_van, marker_color='#636EFA'))
fig.add_trace(go.Bar(name='Same Day',x=x_axis_van, y=y2_van, marker_color='#EF553B')) 
fig.add_trace(go.Bar(name='On Demand',x=x_axis_van, y=y3_van, marker_color='#00CC96'))

fig.add_trace(go.Scatter(x=x_van_cancel, y=y_van_cancel, mode= 'lines', name='Cumulative %'), secondary_y=True)
fig.add_trace(go.Scatter(x=x_van_sameday, y=y_van_sameday, mode= 'lines', name='Cumulative % (Same Day)', 
                         visible= 'legendonly', line_color='#AB63FA'), secondary_y=True)
fig.add_trace(go.Scatter(x=x_van_nextday, y=y_van_nextday, mode= 'lines', name='Cumulative % (Next Day)', 
                         visible= 'legendonly', line_color='#EF553B'), secondary_y=True)
fig.add_trace(go.Scatter(x=x_van_ondemand, y=y_van_ondemand, mode= 'lines', name='Cumulative %(On Demand)', 
                         visible= 'legendonly', line_color='#AB63FA'), secondary_y=True)


fig.update_layout(title='Distribution of cancel time', xaxis_title='Cancel Time (minute)',
                 yaxis_title='Number of Orders', barmode='stack') 

fig.update_layout(updatemenus=[go.layout.Updatemenu(active=0,
                      buttons=list(
                      [dict(label= 'All',
                           method= 'update',
                           args= [{'visible': [True,True,True,True,False,False,False]},
                                  {'title': 'Distribution of cancel time', 
                                  'showlegend':True}]),
                       dict(label= 'on demand',
                           method= 'update',
                           args= [{'visible': [False,False,True,False,False,False,True]},
                                  {'title': 'Distribution of on demand order cancel time', 
                                  'showlegend':True}]),
                       dict(label= 'booking same day',
                           method= 'update',
                           args= [{'visible': [False,True,False,False,True,False,False]},
                                  {'title': 'Distribution of booking same day order cancel time', 
                                  'showlegend':True}]),
                       dict(label= 'booking next day',
                           method= 'update',
                           args= [{'visible': [True,False,False,False,False,True,False]},
                                  {'title': 'Distribution of booking next day order cancel time', 
                                  'showlegend':True}]),
                      ])
                    )
                  ]) 

fig.update_xaxes(tickangle=50) 
fig.update_yaxes(rangemode='tozero')
fig.show()

In [None]:
van_cancel['date_hour'] = van_cancel['created_at'].dt.strftime('%H:%M:%S').str.slice(stop=13)
van_cancel['hour'] = van_cancel['created_at'].dt.hour
van_cancel

In [None]:
def group_cancel_minutes(minutes):
    if minutes <= 5:
        return "0-5"
    elif minutes <= 15:
        return "6-15"
    elif minutes <= 25:
        return "16-25"
    elif minutes <= 40:
        return "26-40"
    elif minutes <= 55:
        return "41-55"
    else:
        return "55+"
    

In [None]:
van_cancel['cancel_group'] = van_cancel['cancel_minute'].apply(group_cancel_minutes)

In [None]:
grouped_data = van_cancel.groupby(['hour','cancel_group', 'booking_type']).size().reset_index(name='count')

In [None]:
fig = px.bar(grouped_data, 
             x='hour', 
             y='count', 
             color='cancel_group',    
             barmode='stack',
             labels={
            "cancel_group": "cancel_group_minutes",
            },
             title='All Booking Types (Cancel time)'   
)

fig_next_day = px.bar(
    grouped_data[grouped_data['booking_type'] == 'Next Day'],
    x='hour',
    y='count',
    color='cancel_group',
    barmode='stack',
    labels={
            "cancel_group": "cancel_group_minutes",
            },
    title='Next Day Booking Type (Cancel time)'
)

# Chart for 'Same Day'
fig_same_day = px.bar(
    grouped_data[grouped_data['booking_type'] == 'Same Day'],
    x='hour',
    y='count',
    color='cancel_group',
    barmode='stack',
    labels={
            "cancel_group": "cancel_group_minutes",
            },
    title='Same Day Booking Type (Cancel time)'
)

# Chart for 'On Demand'
fig_on_demand = px.bar(
    grouped_data[grouped_data['booking_type'] == 'On Demand'],
    x='hour',
    y='count',
    color='cancel_group',
    barmode='stack',
    labels={
            "cancel_group": "cancel_group_minutes",
            },
    title='On Demand Booking Type (Cancel time)'
)


# Show the charts
fig.show()
fig_next_day.show()
fig_same_day.show()
fig_on_demand.show()

### Cancel after Pickup

In [None]:
def get_pickup_cancel(country,start_date,end_date,product_name): 

    
       query= f""" 
       SELECT
          system_order_request_id,
          booking_type,
          DATE_DIFF(request_pickup_time, cancelled_at, minute) AS cancel_time,
          created_at,
          cancelled_at,
          request_pickup_time
        FROM
          `table_{country}`
        WHERE
          status= 'cancelled'
          AND DATETIME(created_at) BETWEEN DATETIME('{start_date}')
          AND DATETIME('{end_date}')
          AND product_name= '{product_name}'
          AND first_response_time IS NOT NULL
          AND DATE_DIFF(cancelled_at, first_assigned_at, millisecond) > 0
        GROUP BY
          1,
          2,
          3,
          4,
          5,6
       """ 
       return bq_db.df_from_sql(query)

In [None]:
get_pickup_cancel('vn','2021-10-11','2022-10-10','TRANSPORT')

In [None]:
def vehicle_pickup_cancel(df): 
    df['cancel_hour']= round((df['cancel_time'] / 60),4) 
    df2=df.sort_values(by=['cancel_hour']).reset_index(drop=True)
    df_cancel_time=df2
    df_cancel_time= df_cancel_time.loc[df_cancel_time['cancel_hour']<=48]
    df_cancel_time['count']=1
    df_cancel_time['cum_perc']= 100 * (df_cancel_time['count'].cumsum()) / df_cancel_time['count'].sum()
    return df_cancel_time 

In [None]:
def cancel_hour_count(df, start_time, end_time=0):
    if start_time == 0.5: 
        df2= df[(df['cancel_time'] >=1) & (df['cancel_time']<30)]
        data= [] 
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
    
    if start_time == 0.99: 
        df2= df[(df['cancel_time'] >=30) & (df['cancel_time']<60)]
        data= [] 
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
        
    if start_time >= 1 and start_time <= 120: 
        df2= df[(df['cancel_hour']>= start_time) & (df['cancel_hour']< end_time)]
        data=[]
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
    
    if start_time > 120 and start_time < 240: 
        df2= df[(df['cancel_hour'] >= 60) & (df['cancel_hour'] < 240)]
        data= [] 
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
    
    if start_time >= 240: 
        df2= df[df['cancel_hour'] >= 240]
        data= [] 
        data.append(df2[df2['booking_type']=='Next Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='Same Day']['system_order_request_id'].count()) 
        data.append(df2[df2['booking_type']=='On Demand']['system_order_request_id'].count())
    
    return data 

In [None]:
def get_axis_hour(df): 
    x_axis= ['<0.5','0.5-0.99','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20-29.99','30-39.99','40-48']
    y_axis= [
         cancel_hour_count(df,0.5), 
         cancel_hour_count(df,0.99),
         cancel_hour_count(df,1,2),
         cancel_hour_count(df,2,3),
         cancel_hour_count(df,3,4),
         cancel_hour_count(df,4,5),
         cancel_hour_count(df,5,6),
         cancel_hour_count(df,6,7),
         cancel_hour_count(df,7,8),
         cancel_hour_count(df,8,9),
         cancel_hour_count(df,9,10),
         cancel_hour_count(df,10,11),
         cancel_hour_count(df,11,12),
         cancel_hour_count(df,12,13),
         cancel_hour_count(df,13,14),
         cancel_hour_count(df,14,15),
         cancel_hour_count(df,15,16),
         cancel_hour_count(df,16,17),
         cancel_hour_count(df,17,18),
         cancel_hour_count(df,18,19),
         cancel_hour_count(df,19,20),
         cancel_hour_count(df,20,30),
         cancel_hour_count(df,30,40),
         cancel_hour_count(df,40,49)
         ]
    return x_axis, y_axis 

In [None]:
def vehicle_pickup_list(df):
#specify v_vehicle, y_vehicle=vehicle_cancel_list(df) to get 2 separate lists 
    x_vehicle_pickup= ['<0.5','0.5-0.99','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20-29.99','30-39.99','40-48']
    y_vehicle_pickup=[ 
                      df[df['cancel_hour']<=0.5]['cum_perc'].max(),
                      df[(df['cancel_hour']>0.5)& (df['cancel_hour']<1)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=1)& (df['cancel_hour']<2)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=2)& (df['cancel_hour']<3)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=3)& (df['cancel_hour']<4)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=4)& (df['cancel_hour']<5)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=5)& (df['cancel_hour']<6)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=6)& (df['cancel_hour']<7)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=7)& (df['cancel_hour']<8)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=8)& (df['cancel_hour']<9)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=9)& (df['cancel_hour']<10)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=10)& (df['cancel_hour']<11)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=11)& (df['cancel_hour']<12)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=12)& (df['cancel_hour']<13)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=13)& (df['cancel_hour']<14)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=14)& (df['cancel_hour']<15)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=15)& (df['cancel_hour']<16)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=16)& (df['cancel_hour']<17)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=17)& (df['cancel_hour']<18)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=18)& (df['cancel_hour']<19)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=19)& (df['cancel_hour']<20)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=20)& (df['cancel_hour']<30)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=30)& (df['cancel_hour']<40)]['cum_perc'].max(),
                      df[(df['cancel_hour']>=40)& (df['cancel_hour']<48)]['cum_perc'].max()
    ]
    return x_vehicle_pickup, y_vehicle_pickup

In [None]:
def vehicle_order_pickup(df, order):
    #specify v_vehicle, y_vehicle=vehicle_cancel_list(df) to get 2 separate lists 
    df2= df.loc[df['booking_type']==f'{order}']
    df2= df2[['system_order_request_id','booking_type','cancel_time','cancel_hour']]
    df2['count']= 1 
    df2['cum_perc']= 100 * (df2['count'].cumsum() / df2['count'].sum()) 
    
    x_vehicle_order_pickup= ['<0.5','0.5-0.99','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20-29.99','30-39.99','40-48']
    y_vehicle_order_pickup=[ 
                      df2[df2['cancel_hour']<=0.5]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>0.5)& (df2['cancel_hour']<1)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=1)& (df2['cancel_hour']<2)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=2)& (df2['cancel_hour']<3)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=3)& (df2['cancel_hour']<4)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=4)& (df2['cancel_hour']<5)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=5)& (df2['cancel_hour']<6)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=6)& (df2['cancel_hour']<7)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=7)& (df2['cancel_hour']<8)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=8)& (df2['cancel_hour']<9)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=9)& (df2['cancel_hour']<10)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=10)& (df2['cancel_hour']<11)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=11)& (df2['cancel_hour']<12)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=12)& (df2['cancel_hour']<13)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=13)& (df2['cancel_hour']<14)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=14)& (df2['cancel_hour']<15)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=15)& (df2['cancel_hour']<16)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=16)& (df2['cancel_hour']<17)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=17)& (df2['cancel_hour']<18)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=18)& (df2['cancel_hour']<19)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=19)& (df2['cancel_hour']<20)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=20)& (df2['cancel_hour']<30)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=30)& (df2['cancel_hour']<40)]['cum_perc'].max(),
                      df2[(df2['cancel_hour']>=40)& (df2['cancel_hour']<48)]['cum_perc'].max()
    ]
    return x_vehicle_order_pickup, y_vehicle_order_pickup

In [None]:
cancel_pickup= get_pickup_cancel('vn','2021-10-11','2022-10-10','TRANSPORT')

In [None]:
cancel_pickup= cancel_pickup[cancel_pickup['cancel_time']>0]

In [None]:
van_pickup_cancel= vehicle_pickup_cancel(cancel_pickup)

In [None]:
x_axis_van_pickup, y_axis_van_pickup= get_axis_hour(van_pickup_cancel)

In [None]:
y1_van_pickup, y2_van_pickup, y3_van_pickup= y_axis_split(y_axis_van_pickup)

In [None]:
x_van_pickup, y_van_pickup= vehicle_pickup_list(van_pickup_cancel)

In [None]:
x_van_sameday_pickup, y_van_sameday_pickup= vehicle_order_pickup(van_pickup_cancel, 'Same Day')

In [None]:
x_van_nextday_pickup, y_van_nextday_pickup= vehicle_order_pickup(van_pickup_cancel, 'Next Day')

In [None]:
x_van_ondemand_pickup, y_van_ondemand_pickup = vehicle_order_pickup(van_pickup_cancel, 'On Demand')

In [None]:
#Cancel time distribution of Van orders ï¼ˆwith dropdown list) 
fig= make_subplots(specs=[[{'secondary_y':True}]])
fig.add_trace(go.Bar(name='Next Day',x=x_axis_van_pickup, y=y1_van_pickup, marker_color='#1664B7'))
fig.add_trace(go.Bar(name='Same Day',x=x_axis_van_pickup, y=y2_van_pickup, marker_color='#F8E015')) 

#Line charts for cumulative & (overall & different order_types)
fig.add_trace(go.Scatter(x=x_van_pickup, y=y_van_pickup, mode= 'lines', name='Cumulative %', 
                         line_color='#00CC96'), secondary_y=True)
fig.add_trace(go.Scatter(x=x_van_sameday_pickup, y=y_van_sameday_pickup, mode= 'lines', 
                         name='Cumulative % (booking same day)', 
                         visible= 'legendonly', line_color='#00CC96'), secondary_y=True)
fig.add_trace(go.Scatter(x=x_van_nextday_pickup, y=y_van_nextday_pickup, mode= 'lines', 
                         name='Cumulative % (booking next day)', 
                         visible= 'legendonly', line_color='#00CC96'), secondary_y=True)


fig.update_layout(title='Distribution of cancel time before pickup time', 
                  xaxis_title='cancel time before pickup time(minute)',
                 yaxis_title='Number of Orders', barmode='stack') 

fig.update_layout(updatemenus=[go.layout.Updatemenu(active=0,
                      buttons=list(
                      [dict(label= 'All',
                           method= 'update',
                           args= [{'visible': [True,True,True,False,False]},
                                  {'title': 'Distribution of cancel time before pickup time', 
                                  'showlegend':True}]),
                       dict(label= 'booking same day',
                           method= 'update',
                           args= [{'visible': [False,True,False,True,False]},
                                  {'title': 'Distribution of booking same day order cancel time before pickup time', 
                                  'showlegend':True}]),
                       dict(label= 'booking next day',
                           method= 'update',
                           args= [{'visible': [True,False,False,False,True]},
                                  {'title': 'Distribution of booking next day order cancel time before pickup time', 
                                  'showlegend':True}]),
                      ])
                    )
                  ]) 

fig.update_xaxes(tickangle=50) 
fig.update_yaxes(rangemode='tozero')
fig.show()

In [None]:
def group_cancel_hours(hours):
    if hours <= 5:
        return "0-5"
    elif hours <= 15:
        return "6-15"
    elif hours <= 30:
        return "16-30"
    elif hours <= 40:
        return "31-40"
    else:
        return "40+"

In [None]:
van_pickup_cancel['date_hour'] = van_pickup_cancel['created_at'].dt.strftime('%H:%M:%S').str.slice(stop=13)
van_pickup_cancel['hour'] = van_pickup_cancel['created_at'].dt.hour
van_pickup_cancel['cancel_group'] = van_pickup_cancel['cancel_hour'].apply(group_cancel_hours)

In [None]:
grouped_data = van_pickup_cancel.groupby(['hour','cancel_group', 'booking_type']).size().reset_index(name='count')

In [None]:
fig = px.bar(grouped_data, 
             x='hour', 
             y='count', 
             color='cancel_group',    # Color based on 'cancel_group'
             barmode='stack',
             labels={
            "cancel_group": "cancel_group_hours",
            },
             title='Van All Booking Types (Cancel time before pickup time)'   
)

fig_next_day = px.bar(
    grouped_data[grouped_data['booking_type'] == 'Next Day'],
    x='hour',
    y='count',
    color='cancel_group',
    barmode='stack',
    labels={
            "cancel_group": "cancel_group_hours",
            },
    title='Van Next Day Booking Type (Cancel time before pickup time)'
)

# Chart for 'Same Day'
fig_same_day = px.bar(
    grouped_data[grouped_data['booking_type'] == 'Same Day'],
    x='hour',
    y='count',
    color='cancel_group',
    barmode='stack',
    labels={
            "cancel_group": "cancel_group_hours",
            },
    title='Van Same Day Booking Type (Cancel time before pickup time)'
)

# Show the charts
fig.show()
fig_next_day.show()
fig_same_day.show()

In [None]:
van_pickup_cancel['cancel_minute']= round((van_pickup_cancel['cancel_hour'] * 60),4) 
van_pickup_cancel['cancel_group'] = van_pickup_cancel['cancel_minute'].apply(group_cancel_minutes)
grouped_data = van_pickup_cancel.groupby(['hour','cancel_group', 'booking_type']).size().reset_index(name='count')

In [None]:
fig_on_demand = px.bar(
    grouped_data[grouped_data['booking_type'] == 'On Demand'],
    x='hour',
    y='count',
    color='cancel_group',
    barmode='stack',
    labels={
            "cancel_group": "cancel_group_minutes",
            },
    title='On Demand Booking Type (Cancel time before pickup time)'
)



fig_on_demand.show()