# 1. Import libraries

In [27]:
from module.distance_calculator import DistanceCalculator

import numpy as np
import pandas as pd

from math import sqrt, cos

import networkx as nx

import datetime
from datetime import time, date, datetime

# 2. Read raw data, drop unused column

In [28]:
raw_df = pd.read_excel('Capstone SampleData 14days.xlsx', sheet_name='Sheet1')

In [29]:
raw_df.head(3)

Unnamed: 0,JobAcceptedDate,jobAcceptedTime,calledMerchantTime,arrivedAtMerchantTime,mealPickedUpTime,arrivedAtCustLocationTime,foodDeliveredTime,riderInitial.lat,riderInitial.long,Merchant.Lat,Merchant.Lng,Customer.lat,Customer.lng,MerchantName,NationFoodCategory,FoodCategories
0,2020-10-03,20:39:15.840000,20:40:51.840000,20:48:19.840000,20:49:18.840000,21:14:09.840000,21:15:12.840000,13.825853,100.551958,13.825703,100.559309,13.756065,100.562375,3K SEA ZABB,Thai,อาหารทะเล
1,2020-10-01,12:54:34.560000,12:55:38.560000,13:10:26.560000,13:24:56.560000,14:01:00.560000,14:01:39.560000,13.819283,100.561874,13.831056,100.570731,13.774685,100.573913,ร้านอาหารอีสานแม่อลิส,Thai,อาหารอีสาน
2,2020-10-01,12:32:06.720000,12:32:21.720000,12:41:08.720000,12:46:41.720000,13:22:01.720000,13:22:53.720000,13.827041,100.554284,13.827205,100.536048,13.743898,100.571659,ไก่ย่างห้าดาว กรุงเทพ-นนท์44,Thai,Fast Food


In [30]:
raw_df.shape

(10000, 16)

In [31]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   JobAcceptedDate            10000 non-null  datetime64[ns]
 1   jobAcceptedTime            10000 non-null  object        
 2   calledMerchantTime         10000 non-null  object        
 3   arrivedAtMerchantTime      10000 non-null  object        
 4   mealPickedUpTime           10000 non-null  object        
 5   arrivedAtCustLocationTime  10000 non-null  object        
 6   foodDeliveredTime          10000 non-null  object        
 7   riderInitial.lat           10000 non-null  float64       
 8   riderInitial.long          10000 non-null  float64       
 9   Merchant.Lat               10000 non-null  float64       
 10  Merchant.Lng               10000 non-null  float64       
 11  Customer.lat               10000 non-null  float64       
 12  Custo

In [32]:
# We define delivery time 
# Remove the columns that are not needed
drop_list = ['jobAcceptedTime',
             'MerchantName',
             'NationFoodCategory',
             'FoodCategories']
raw_df.drop(drop_list, axis=1, inplace=True)

raw_df.head(3)

Unnamed: 0,JobAcceptedDate,calledMerchantTime,arrivedAtMerchantTime,mealPickedUpTime,arrivedAtCustLocationTime,foodDeliveredTime,riderInitial.lat,riderInitial.long,Merchant.Lat,Merchant.Lng,Customer.lat,Customer.lng
0,2020-10-03,20:40:51.840000,20:48:19.840000,20:49:18.840000,21:14:09.840000,21:15:12.840000,13.825853,100.551958,13.825703,100.559309,13.756065,100.562375
1,2020-10-01,12:55:38.560000,13:10:26.560000,13:24:56.560000,14:01:00.560000,14:01:39.560000,13.819283,100.561874,13.831056,100.570731,13.774685,100.573913
2,2020-10-01,12:32:21.720000,12:41:08.720000,12:46:41.720000,13:22:01.720000,13:22:53.720000,13.827041,100.554284,13.827205,100.536048,13.743898,100.571659


In [33]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   JobAcceptedDate            10000 non-null  datetime64[ns]
 1   calledMerchantTime         10000 non-null  object        
 2   arrivedAtMerchantTime      10000 non-null  object        
 3   mealPickedUpTime           10000 non-null  object        
 4   arrivedAtCustLocationTime  10000 non-null  object        
 5   foodDeliveredTime          10000 non-null  object        
 6   riderInitial.lat           10000 non-null  float64       
 7   riderInitial.long          10000 non-null  float64       
 8   Merchant.Lat               10000 non-null  float64       
 9   Merchant.Lng               10000 non-null  float64       
 10  Customer.lat               10000 non-null  float64       
 11  Customer.lng               10000 non-null  float64       
dtypes: da

# 3. Extract features

## 3.1 Euclidian distance

In [8]:
def get_euc(coords_1, coords_2):
    R = 6371000; conversion_const = 0.0174533
    c_1 = (coords_1[0]*conversion_const, coords_1[1]*conversion_const)
    c_2 = (coords_2[0]*conversion_const, coords_2[1]*conversion_const)
    delta_phi = abs(c_1[1]-c_2[1])
    theta = c_1[0]
    delta_theta = abs(c_1[0]-c_2[0])
    del_x = R*cos(theta)*delta_phi 
    del_y = R*delta_theta
    return sqrt(del_x**2 + del_y**2)

In [9]:
distance_feats = [('riderInitial.lat', 'riderInitial.long', 'Merchant.Lat', 'Merchant.Lng'),
                  ('Merchant.Lat', 'Merchant.Lng', 'Customer.lat', 'Customer.lng')]
for feat_data in distance_feats:
    u_name = feat_data[0].split('.')[0]
    v_name = feat_data[2].split('.')[0]
    feat_name = u_name + '_to_' + v_name + '_EucDistance'
    # raw_df[feat_name] = raw_df.apply(lambda x: get_euc((x[feat_data[0]],x[feat_data[1]]), (x[feat_data[2]],x[feat_data[3]])), axis=1)

In [10]:
# raw_df.to_csv('Sample 14days_food_delivery_All.csv', index=False)

## 3.2 Shortest distance (from OSMNX)

In [11]:
raw_df.head(3)

Unnamed: 0,JobAcceptedDate,calledMerchantTime,arrivedAtMerchantTime,mealPickedUpTime,arrivedAtCustLocationTime,foodDeliveredTime,riderInitial.lat,riderInitial.long,Merchant.Lat,Merchant.Lng,Customer.lat,Customer.lng
0,2020-10-03,20:40:51.840000,20:48:19.840000,20:49:18.840000,21:14:09.840000,21:15:12.840000,13.825853,100.551958,13.825703,100.559309,13.756065,100.562375
1,2020-10-01,12:55:38.560000,13:10:26.560000,13:24:56.560000,14:01:00.560000,14:01:39.560000,13.819283,100.561874,13.831056,100.570731,13.774685,100.573913
2,2020-10-01,12:32:21.720000,12:41:08.720000,12:46:41.720000,13:22:01.720000,13:22:53.720000,13.827041,100.554284,13.827205,100.536048,13.743898,100.571659


In [12]:
tmp_df = pd.read_csv('Sample 14days (cleaned).csv')
tmp_df.head(3)

Unnamed: 0,JobAcceptedDate,jobAcceptedTime,Merchant.Lat,Merchant.Lng,Customer.lat,Customer.lng,DeliveryTime (s),DeliveryTime (m),EucDist,day_of_week,...,day_of_week_name_Fri,day_of_week_name_Mon,day_of_week_name_Sat,day_of_week_name_Sun,day_of_week_name_Thu,day_of_week_name_Tue,day_of_week_name_Wed,day_of_week_sin,day_of_week_cos,ShortestDist
0,2020-10-03,20:39:15.840000,13.825703,100.559309,13.756065,100.562375,1491.0,25,7750.51144,5,...,0,0,1,0,0,0,0,-0.974928,-0.222521,9679.275
1,2020-10-01,12:54:34.560000,13.831056,100.570731,13.774685,100.573913,2164.0,36,6277.603705,3,...,0,0,0,0,1,0,0,0.433884,-0.900969,7514.353
2,2020-10-01,12:32:06.720000,13.827205,100.536048,13.743898,100.571659,2120.0,35,10029.576224,3,...,0,0,0,0,1,0,0,0.433884,-0.900969,15891.237


In [13]:
raw_df['riderInitial_to_Merchant_ShortestDistance'] = 0
raw_df['Merchant_to_Customer_ShortestDistance'] = tmp_df['ShortestDist']
raw_df.head(3)

Unnamed: 0,JobAcceptedDate,calledMerchantTime,arrivedAtMerchantTime,mealPickedUpTime,arrivedAtCustLocationTime,foodDeliveredTime,riderInitial.lat,riderInitial.long,Merchant.Lat,Merchant.Lng,Customer.lat,Customer.lng,riderInitial_to_Merchant_ShortestDistance,Merchant_to_Customer_ShortestDistance
0,2020-10-03,20:40:51.840000,20:48:19.840000,20:49:18.840000,21:14:09.840000,21:15:12.840000,13.825853,100.551958,13.825703,100.559309,13.756065,100.562375,0,9679.275
1,2020-10-01,12:55:38.560000,13:10:26.560000,13:24:56.560000,14:01:00.560000,14:01:39.560000,13.819283,100.561874,13.831056,100.570731,13.774685,100.573913,0,7514.353
2,2020-10-01,12:32:21.720000,12:41:08.720000,12:46:41.720000,13:22:01.720000,13:22:53.720000,13.827041,100.554284,13.827205,100.536048,13.743898,100.571659,0,15891.237


In [14]:
raw_df = pd.read_csv('Sample 14days_food_delivery_All.csv')

In [15]:
raw_df.head(3)

Unnamed: 0,JobAcceptedDate,calledMerchantTime,arrivedAtMerchantTime,mealPickedUpTime,arrivedAtCustLocationTime,foodDeliveredTime,riderInitial.lat,riderInitial.long,Merchant.Lat,Merchant.Lng,...,day_of_week_name,day_of_week_sin,day_of_week_cos,isHoliday,calledMerchantTime_to_arrivedAtMerchantTime,calledMerchantTime_to_arrivedAtMerchantTime (s),calledMerchantTime_to_arrivedAtMerchantTime (m),mealPickedUpTime_to_arrivedAtCustLocationTime,mealPickedUpTime_to_arrivedAtCustLocationTime (s),mealPickedUpTime_to_arrivedAtCustLocationTime (m)
0,2020-10-03,2020-10-03 20:40:51.840000-03:00,2020-10-03 20:48:19.840000-03:00,2020-10-03 20:49:18.840000-03:00,2020-10-03 21:14:09.840000-03:00,2020-10-03 21:15:12.840000-03:00,13.825853,100.551958,13.825703,100.559309,...,Sat,-0.974928,-0.222521,1,0 days 00:07:28,448.0,7.466667,0 days 00:24:51,1491.0,24.85
1,2020-10-01,2020-10-01 12:55:38.560000-01:00,2020-10-01 13:10:26.560000-01:00,2020-10-01 13:24:56.560000-01:00,2020-10-01 14:01:00.560000-01:00,2020-10-01 14:01:39.560000-01:00,13.819283,100.561874,13.831056,100.570731,...,Thu,0.433884,-0.900969,0,0 days 00:14:48,888.0,14.8,0 days 00:36:04,2164.0,36.066667
2,2020-10-01,2020-10-01 12:32:21.720000-01:00,2020-10-01 12:41:08.720000-01:00,2020-10-01 12:46:41.720000-01:00,2020-10-01 13:22:01.720000-01:00,2020-10-01 13:22:53.720000-01:00,13.827041,100.554284,13.827205,100.536048,...,Thu,0.433884,-0.900969,0,0 days 00:08:47,527.0,8.783333,0 days 00:35:20,2120.0,35.333333


In [16]:
# raw_df = pd.read_csv('Sample 14days_food_delivery_All.csv')
# D = DistanceCalculator()
# batch_size = 20
# start_idx = 220
# l = 0
# for i in range(start_idx, raw_df.shape[0], batch_size):
#     print(i, end=" ")
#     if l>0 and l%10==0:
#         print()
#     raw_df.loc[i:i+batch_size, "riderInitial_to_Merchant_ShortestDistance"] = \
#         raw_df.loc[i:i+batch_size].apply(lambda x: D.shortestDistance((x['riderInitial.lat'], x['riderInitial.long']), (x['Merchant.Lat'], x['Merchant.Lng'])), axis=1)
#     raw_df.to_csv("Sample 14days_food_delivery_All.csv", index=False)
#     l+=1

In [17]:
raw_df[raw_df['riderInitial_to_Merchant_ShortestDistance'] < raw_df['riderInitial_to_Merchant_EucDistance']]

Unnamed: 0,JobAcceptedDate,calledMerchantTime,arrivedAtMerchantTime,mealPickedUpTime,arrivedAtCustLocationTime,foodDeliveredTime,riderInitial.lat,riderInitial.long,Merchant.Lat,Merchant.Lng,...,day_of_week_name,day_of_week_sin,day_of_week_cos,isHoliday,calledMerchantTime_to_arrivedAtMerchantTime,calledMerchantTime_to_arrivedAtMerchantTime (s),calledMerchantTime_to_arrivedAtMerchantTime (m),mealPickedUpTime_to_arrivedAtCustLocationTime,mealPickedUpTime_to_arrivedAtCustLocationTime (s),mealPickedUpTime_to_arrivedAtCustLocationTime (m)
4,2020-10-13,2020-10-13 11:09:03.880000-13:00,2020-10-13 11:13:43.880000-13:00,2020-10-13 11:15:32.880000-13:00,2020-10-13 11:40:26.880000-13:00,2020-10-13 11:41:23.880000-13:00,13.825773,100.567170,13.827034,100.564534,...,Tue,0.781831,0.623490,1,0 days 00:04:40,280.0,4.666667,0 days 00:24:54,1494.0,24.900000
22,2020-10-07,2020-10-07 09:20:01.560000-07:00,2020-10-07 09:25:03.560000-07:00,2020-10-07 09:38:40.560000-07:00,2020-10-07 10:08:33.560000-07:00,2020-10-07 10:09:32.560000-07:00,13.828253,100.564004,13.829590,100.562126,...,Wed,0.974928,-0.222521,0,0 days 00:05:02,302.0,5.033333,0 days 00:29:53,1793.0,29.883333
36,2020-10-08,2020-10-08 12:54:52.920000-08:00,2020-10-08 13:01:15.920000-08:00,2020-10-08 13:15:09.920000-08:00,2020-10-08 13:40:51.920000-08:00,2020-10-08 13:42:03.920000-08:00,13.822379,100.552412,13.823379,100.550629,...,Thu,0.433884,-0.900969,0,0 days 00:06:23,383.0,6.383333,0 days 00:25:42,1542.0,25.700000
52,2020-10-01,2020-10-01 18:02:52.320000-01:00,2020-10-01 18:08:39.320000-01:00,2020-10-01 18:10:39.320000-01:00,2020-10-01 18:29:30.320000-01:00,2020-10-01 18:30:52.320000-01:00,13.826040,100.567431,13.827034,100.564534,...,Thu,0.433884,-0.900969,0,0 days 00:05:47,347.0,5.783333,0 days 00:18:51,1131.0,18.850000
60,2020-10-11,2020-10-11 17:02:12.400000-11:00,2020-10-11 17:09:19.400000-11:00,2020-10-11 17:10:36.400000-11:00,2020-10-11 17:46:32.400000-11:00,2020-10-11 17:46:57.400000-11:00,13.825652,100.563115,13.827034,100.564534,...,Sun,-0.781831,0.623490,1,0 days 00:07:07,427.0,7.116667,0 days 00:35:56,2156.0,35.933333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9587,2020-10-12,2020-10-12 08:37:49.880000-12:00,2020-10-12 08:45:47.880000-12:00,2020-10-12 08:47:26.880000-12:00,2020-10-12 09:33:35.880000-12:00,2020-10-12 09:34:23.880000-12:00,13.827574,100.569035,13.823723,100.566473,...,Mon,0.000000,1.000000,0,0 days 00:07:58,478.0,7.966667,0 days 00:46:09,2769.0,46.150000
9632,2020-10-08,2020-10-08 15:32:35.800000-08:00,2020-10-08 15:36:31.800000-08:00,2020-10-08 15:42:58.800000-08:00,2020-10-08 16:06:37.800000-08:00,2020-10-08 16:07:17.800000-08:00,13.826353,100.561604,13.825457,100.559054,...,Thu,0.433884,-0.900969,0,0 days 00:03:56,236.0,3.933333,0 days 00:23:39,1419.0,23.650000
9633,2020-10-12,2020-10-12 15:53:24.800000-12:00,2020-10-12 15:59:33.800000-12:00,2020-10-12 16:06:00.800000-12:00,2020-10-12 16:21:30.800000-12:00,2020-10-12 16:22:02.800000-12:00,13.825768,100.568114,13.827034,100.564534,...,Mon,0.000000,1.000000,0,0 days 00:06:09,369.0,6.150000,0 days 00:15:30,930.0,15.500000
9694,2020-10-02,2020-10-02 08:48:39.040000-02:00,2020-10-02 08:52:32.040000-02:00,2020-10-02 08:56:56.040000-02:00,2020-10-02 09:18:23.040000-02:00,2020-10-02 09:18:49.040000-02:00,13.826428,100.567509,13.827034,100.564534,...,Fri,-0.433884,-0.900969,0,0 days 00:03:53,233.0,3.883333,0 days 00:21:27,1287.0,21.450000


## 3.3 day_of_week

In [None]:
raw_df['JobAcceptedDate'] = raw_df['JobAcceptedDate'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').date())

In [52]:
mapper = {0:"Mon", 1:"Tue", 2:"Wed", 3:"Thu", 4:"Fri", 5:"Sat", 6:"Sun"}
raw_df["day_of_week"] = raw_df["JobAcceptedDate"].apply(lambda x: x.weekday())
raw_df["day_of_week_name"] = raw_df["JobAcceptedDate"].apply(lambda x: mapper[x.weekday()])


## 3.4 day_of_week angular distance

In [70]:
raw_df["day_of_week_sin"] = np.sin(raw_df["day_of_week"]*(2.*np.pi/7))
raw_df["day_of_week_cos"] = np.cos(raw_df["day_of_week"]*(2.*np.pi/7))

## 3.5 isHoliday

In [74]:
raw_df["isHoliday"] = raw_df["JobAcceptedDate"].apply(lambda x: int(x == datetime.strptime('2020-10-13', '%Y-%m-%d').date() ))
raw_df["isHoliday"] = ( raw_df["isHoliday"] | ( (raw_df["day_of_week"] == 5) | (raw_df["day_of_week"] == 6) ) ).astype(int)

## 3.6 datetime

In [76]:
def to_datetime(df, date_col, time_col):
    return pd.to_datetime(df[date_col].astype(str) + " " + df[time_col].astype(str))

In [77]:
to_datetime_feats = ['calledMerchantTime', 'arrivedAtMerchantTime', 'mealPickedUpTime', 'arrivedAtCustLocationTime', 'foodDeliveredTime']
for feat in to_datetime_feats:
    raw_df[feat] = to_datetime(raw_df, 'JobAcceptedDate', feat)

## 3.7 duration

In [79]:
def to_duration(df, start_col, end_col):
    return df[end_col] - df[start_col]

In [80]:
duration_feats = [('calledMerchantTime', 'arrivedAtMerchantTime'),
                  ('mealPickedUpTime', 'arrivedAtCustLocationTime')]

for feat_pairs in duration_feats:
    raw_df[feat_pairs[0] + '_to_' + feat_pairs[1]] = to_duration(raw_df, feat_pairs[0], feat_pairs[1])
    raw_df[feat_pairs[0] + '_to_' + feat_pairs[1] + ' (s)'] = raw_df[feat_pairs[0] + '_to_' + feat_pairs[1]].dt.total_seconds()
    raw_df[feat_pairs[0] + '_to_' + feat_pairs[1] + ' (m)'] = raw_df[feat_pairs[0] + '_to_' + feat_pairs[1]].dt.total_seconds() / 60

In [82]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 27 columns):
 #   Column                                             Non-Null Count  Dtype          
---  ------                                             --------------  -----          
 0   JobAcceptedDate                                    10000 non-null  object         
 1   calledMerchantTime                                 10000 non-null  object         
 2   arrivedAtMerchantTime                              10000 non-null  object         
 3   mealPickedUpTime                                   10000 non-null  object         
 4   arrivedAtCustLocationTime                          10000 non-null  object         
 5   foodDeliveredTime                                  10000 non-null  object         
 6   riderInitial.lat                                   10000 non-null  float64        
 7   riderInitial.long                                  10000 non-null  float64        
 8   Merchan

In [85]:
raw_df.to_csv('Sample 14days_food_delivery_All.csv', index=False)

# 4. Prepare Data for ML

## 4.1 load extracted feature data

In [20]:
raw_df = pd.read_csv('Sample 14days_food_delivery_All.csv')

In [21]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 27 columns):
 #   Column                                             Non-Null Count  Dtype  
---  ------                                             --------------  -----  
 0   JobAcceptedDate                                    10000 non-null  object 
 1   calledMerchantTime                                 10000 non-null  object 
 2   arrivedAtMerchantTime                              10000 non-null  object 
 3   mealPickedUpTime                                   10000 non-null  object 
 4   arrivedAtCustLocationTime                          10000 non-null  object 
 5   foodDeliveredTime                                  10000 non-null  object 
 6   riderInitial.lat                                   10000 non-null  float64
 7   riderInitial.long                                  10000 non-null  float64
 8   Merchant.Lat                                       10000 non-null  float64
 9   Merchan

## 4.2 construct new dataframe

In [22]:
data_dict = {
    'u_lat':[],
    'u_lon':[],
    'v_lat':[],
    'v_lon':[],
    'euc_dist':[],
    'shortest_dist':[],
    'day_of_week_sin':[],
    'day_of_week_cos':[],
    'duration (s)':[],
    'duration (m)':[]}

## 4.3 fill data from raw_df to df

In [23]:
data_dict['u_lat'] += raw_df['riderInitial.lat'].tolist()
data_dict['u_lon'] += raw_df['riderInitial.long'].tolist()
data_dict['v_lat'] += raw_df['Merchant.Lat'].tolist()
data_dict['v_lon'] += raw_df['Merchant.Lng'].tolist()
data_dict['euc_dist'] += raw_df['riderInitial_to_Merchant_EucDistance'].tolist()
data_dict['shortest_dist'] += raw_df['riderInitial_to_Merchant_ShortestDistance'].tolist()
data_dict['day_of_week_sin'] += raw_df['day_of_week_sin'].tolist()
data_dict['day_of_week_cos'] += raw_df['day_of_week_cos'].tolist()
data_dict['duration (s)'] += raw_df['calledMerchantTime_to_arrivedAtMerchantTime (s)'].tolist()
data_dict['duration (m)'] += raw_df['calledMerchantTime_to_arrivedAtMerchantTime (m)'].tolist()

In [24]:
data_dict['u_lat'] += raw_df['Merchant.Lat'].tolist()
data_dict['u_lon'] += raw_df['Merchant.Lng'].tolist()
data_dict['v_lat'] += raw_df['Customer.lat'].tolist()
data_dict['v_lon'] += raw_df['Customer.lng'].tolist()
data_dict['euc_dist'] += raw_df['Merchant_to_Customer_EucDistance'].tolist()
data_dict['shortest_dist'] += raw_df['Merchant_to_Customer_ShortestDistance'].tolist()
data_dict['day_of_week_sin'] += raw_df['day_of_week_sin'].tolist()
data_dict['day_of_week_cos'] += raw_df['day_of_week_cos'].tolist()
data_dict['duration (s)'] += raw_df['mealPickedUpTime_to_arrivedAtCustLocationTime (s)'].tolist()
data_dict['duration (m)'] += raw_df['mealPickedUpTime_to_arrivedAtCustLocationTime (m)'].tolist()

In [25]:
df = pd.DataFrame(data_dict)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   u_lat            20000 non-null  float64
 1   u_lon            20000 non-null  float64
 2   v_lat            20000 non-null  float64
 3   v_lon            20000 non-null  float64
 4   euc_dist         20000 non-null  float64
 5   shortest_dist    20000 non-null  float64
 6   day_of_week_sin  20000 non-null  float64
 7   day_of_week_cos  20000 non-null  float64
 8   duration (s)     20000 non-null  float64
 9   duration (m)     20000 non-null  float64
dtypes: float64(10)
memory usage: 1.5 MB


## 4.4 save data

In [26]:
df.to_csv('Sample 14days_food_delivery_Cleaned.csv', index=False)