In [1]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import numpy as np
import sys
import haversine as hs
import os
from os.path import join
from pathlib import Path
from sklearn.neighbors import BallTree

In [2]:
inp_graph = "../graphfiles/"
inp_data = "./../Delhi/Delhi.csv"
out_dir = "data2"
isgraph = True
isrestaurants = True
isvehicle = True
isorders = True

In [3]:
Path(out_dir).mkdir(parents=True, exist_ok=True)   
df = pd.read_csv(join(inp_graph, 'nodes_mod_connected.csv'))
l = df.to_dict('records')

nodes = df[['lat', 'lon']].values * np.pi/180.0
tree = BallTree(nodes, metric = 'haversine', leaf_size=1)
idx_to_id = df.id.values.tolist()


df2 = pd.read_csv(join(inp_graph, 'segments_mod_connected.csv'))
df2 = df2.sort_values(by=['u', 'v'])
l2 = df2.to_dict('records')

In [4]:
# assigning old node id to new to id
dic = {}
cnt = 1
for entry in l2:
    if entry['u'] not in dic:
        dic[entry['u']] = cnt
        cnt+=1
        
for entry in l2:        
    if entry['v'] not in dic:
        dic[entry['v']] = cnt
        cnt+=1

nodeidToLatLong = {dic[node['id']] : (node['lat'], node['lon']) for node in l}  # newnodeid : lat,long
graphList = [ [dic[x['u']], dic[x['v']], x['dist']] for x in l2]                # u,v,dist
len(dic), len(nodeidToLatLong), len(graphList)

(182471, 182471, 460584)

In [5]:
if isgraph:
    speed = 8
    f = open(join(out_dir, 'graph.txt'), 'w')
    f.write(f"p sp {len(dic)} {len(l2)}\n")
    for l in graphList:
        f.write(f"a {l[0]} {l[1]} {max(1, round(l[2]/speed))}\n")
    f.close()

In [6]:
df = pd.read_csv(inp_data)

In [7]:
dfr = df.groupby(['store_id','store_lat','store_lng']).size().reset_index(name='count')
rest_latlong = dfr[['store_lat','store_lng']].values.tolist()
rest_latlong = np.array(rest_latlong).astype('float')
rest_latlong = rest_latlong * np.pi / 180.0
ans = tree.query(rest_latlong, k=1, return_distance=True, dualtree=True, breadth_first=False)
dfr["distance"] = ans[0]*6378137
dfr["node_id"] = ans[1]
dfr['node_id'] = dfr['node_id'].apply(lambda x: dic[idx_to_id[x]])

storetonodemap = dict(dfr[['store_id', 'node_id']].to_dict('split')['data'])

In [8]:
dfd = df.groupby(['rider_id','store_id']).size().reset_index(name='count')
dfd['node_id'] = dfd['store_id'].apply(lambda x: storetonodemap[x])
drivertonodemap = dict(dfd[['rider_id', 'node_id']].to_dict('split')['data'])

In [9]:
restList = sorted(dfr['node_id'])
restListDF = pd.DataFrame(restList)
restListDF.to_csv(join(out_dir, 'restaurants.csv'), sep=' ', index=False, header=False)

In [10]:
listDrivers = sorted(dfd['node_id'])
listDriversDF = pd.DataFrame(listDrivers)
listDriversDF.to_csv(join(out_dir, 'vehicles.csv'), index=False, header=False, sep=' ')

In [11]:
# Orders start from here
df = df.sort_values(by=['order_date', 'order_received_time'])
order_latlong = df[['customer_lat','customer_lng']].values.tolist()
order_latlong = np.array(order_latlong).astype('float')
order_latlong = order_latlong * np.pi / 180.0
ans = tree.query(order_latlong, k=1, return_distance=True, dualtree=True, breadth_first=False)
df["distance"] = ans[0]*6378137
df["node_id"] = ans[1]
df['node_id'] = df['node_id'].apply(lambda x: dic[idx_to_id[x]])

ordersDelhi = df.to_dict('records')
df

Unnamed: 0.1,Unnamed: 0,order_date,order_number,order_items,store_id,store_lat,store_lng,customer_lat,customer_lng,score,...,actual_delivery_distance_km,actual_return_hub_distance_km,suspicious_reachedgate_flag,suspicious_delivered_flag,startbike_lat_lng,reachedgate_lat_lng,delivered_lat_lng,backtostore_lat_lng,distance,node_id
123019,123994,2021-12-17,1,"[{""quantity"": 1, ""menu_code"": ""PIZ0119"", ""volu...",DPI65910,28.411150,77.043050,28.413235,77.043337,20.0,...,,,False,False,"28.4124386000000015,77.0440605999999946","28.4127675000000011,77.0436480999999986","28.4124982999999993,77.0435899999999947","28.4113867000000013,77.0431308000000001",4817.360160,77261
125032,126007,2021-12-17,1,"[{""quantity"": 1, ""menu_code"": ""PIZ0130"", ""volu...",DPI63828,28.586381,77.341360,28.597110,77.345800,16.0,...,,,False,False,"28.5866007999999994,77.341100499999996","28.5865785000000017,77.3410039999999981","28.5969100000000012,77.3453149999999994","28.5950580000000016,77.3426216000000011",11.205839,9313
15850,16001,2021-12-17,2,"[{""quantity"": 1, ""menu_code"": ""BEV0126"", ""volu...",DPI66526,28.363052,76.934594,28.363935,76.933845,0.0,...,,,False,False,"28.3631126999999985,76.9345665999999966","28.3600999999999992,76.9254066999999964","28.3600871000000012,76.925297999999998","28.3628828000000013,76.934087599999998",11860.493553,74796
13801,13951,2021-12-17,1,"[{""quantity"": 1, ""menu_code"": ""PIZ0170"", ""volu...",DPI66952,28.340222,77.323567,28.341432,77.324693,0.0,...,,,False,True,"28.3416093999999994,77.3246747000000028","28.3416031000000004,77.3246742999999981","28.3448416999999999,77.3012066999999945","28.3414785000000009,77.3246270000000067",12800.407395,156696
15824,15975,2021-12-17,2,"[{""quantity"": 1, ""menu_code"": ""PIZ0117"", ""volu...",DPI66243,28.646740,77.367560,28.649262,77.371382,18.0,...,,,False,False,"28.6462937000000011,77.3677116000000069","28.6462937000000011,77.3677116000000069","28.6462937000000011,77.3677116000000069","28.6462937000000011,77.3677116000000069",71.169941,12195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400694,185844,2021-12-30,286,"[{""quantity"": 1, ""menu_code"": ""PIZ0131"", ""volu...",DPI66237,28.558875,77.165066,28.545130,77.167420,18.0,...,,,True,False,"28.5578010999999989,77.1643241000000017","28.5577661999999997,77.1643427000000059","28.5448614999999997,77.1668334000000016","28.5578031999999986,77.164367900000002",50.113399,3565
456813,242384,2021-12-30,287,"[{""quantity"": 1, ""menu_code"": ""PIZ0129"", ""volu...",DPI66237,28.558875,77.165066,28.545685,77.166745,16.0,...,,,False,False,"28.5578010999999989,77.1643241000000017","28.5447665000000015,77.1667860999999959","28.5451753000000004,77.167435299999994","28.5578031999999986,77.164367900000002",39.136411,67058
240576,24489,2021-12-30,330,"[{""quantity"": 1, ""menu_code"": ""DIP0005"", ""volu...",DPI63941,28.618232,77.416957,28.620339,77.425278,20.0,...,,,False,False,"28.6183154000000002,77.4183941999999945","28.6193880999999983,77.4253683000000024","28.620558599999999,77.4246413000000047","28.6182146000000017,77.4184675999999996",46.082825,177698
337748,122420,2021-12-30,201,"[{""quantity"": 2, ""menu_code"": ""DIP0005"", ""volu...",DPI66218,28.471830,77.104010,28.469224,77.104199,18.0,...,,,False,False,"28.4719100000000012,77.1024732999999998","28.4718851999999991,77.1024994999999933","28.4693235000000016,77.1043539000000067","28.4693703000000014,77.1042587999999967",37.193487,27567


In [12]:
def TimeToEpoch(t):
    try:
        dt1 = datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%f")
        dt2 = dt1.replace(hour=0, minute=0, second=0)
        return abs(dt1-dt2).total_seconds()
    except:
        dt1 = datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S")
        dt2 = dt1.replace(hour=0, minute=0, second=0)
        return abs(dt1-dt2).total_seconds()

In [13]:
def getItemsList(oi, itemsDict):
        a = json.loads(oi)
        ret = []
        for y in a:
            l1 = [y['menu_description'], y['quantity'], y['order_line_total'], y['menu_code']]
            if y['menu_code'] not in itemsDict:
                itemsDict[y['menu_code']] = len(itemsDict)
            for j in range(0, y['quantity']):
                ret.append(itemsDict[y['menu_code']])
        return ret

In [14]:
total_slots = 24
def getTimeIndex(t):
    try:
        dt = datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%f")
        current = datetime.datetime(2011, 1, 1)
        t1 = current.time()
        t2 = dt.time()
        for i in range(0, total_slots):
            current = current + datetime.timedelta(minutes = 60)
            if(current.time() > t2):
                return i
        return total_slots-1
    except:
        dt = datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S")
        current = datetime.datetime(2011, 1, 1)
        t1 = current.time()
        t2 = dt.time()
        for i in range(0, total_slots):
            current = current + datetime.timedelta(minutes = 60)
            if(current.time() > t2):
                return i
        return total_slots-1

In [15]:
tpath = join(out_dir, 'orders')
Path(tpath).mkdir(parents=True, exist_ok=True)   
for order in ordersDelhi:
    date = order['order_date']
    Path(join(tpath,date)).mkdir(parents=True, exist_ok=True)
tpath

'data2/orders'

In [19]:
import json
itemsDict = {}
fileHandlersMap = {}
for order in ordersDelhi:
    receivedTime = int(TimeToEpoch(order['order_received_time']))
#     print(order['order_received_time'])
    cust_lat = order['customer_lat']
    cust_lng = order['customer_lng']
#     cust_node_id = latLongToNodeId(cust_lat, cust_lng, nodeToLatLongMap)
    cust_node_id = order['node_id']
    rest_node_id = storetonodemap[order['store_id']]
    date = order['order_date']
    itemsList = getItemsList(order['order_items'], itemsDict)
    slot = getTimeIndex(order['order_received_time'])
    if((date, slot) in fileHandlersMap):
        fileHandlersMap[(date, slot)].write(str(receivedTime)+" "+str(cust_node_id)+" "+str(rest_node_id)+" ")
        for i in itemsList:
            fileHandlersMap[(date, slot)].write(str(i)+" ")
        fileHandlersMap[(date, slot)].write('\n')
    else:
        f = open(join(tpath, date, str(slot)+'.orders'), 'w')
        fileHandlersMap[(date, slot)] = f
#         fileHandlersMap[(date, slot)].write("order_received_time,cust_node_id,rest_node_id,ordered_items\n")
        fileHandlersMap[(date, slot)].write(str(receivedTime)+" "+str(cust_node_id)+" "+str(rest_node_id)+" ")
        for i in itemsList:
            fileHandlersMap[(date, slot)].write(str(i)+" ")
        fileHandlersMap[(date, slot)].write('\n')  

for k in fileHandlersMap:
    fileHandlersMap[k].close()

In [22]:
import json
import time
itemsDict = {}
old = (0,0)
output = []
j = 0

for order in ordersDelhi:
    receivedTime = int(TimeToEpoch(order['order_received_time']))
    date = order['order_date']
    slot = getTimeIndex(order['order_received_time'])
    if (date, slot) != old:
        if(old != (0, 0)):
            j+=1
            print(receivedTime, old[0], old[1])
            dftmp = pd.DataFrame(output)
            dftmp.to_csv(join(tpath, old[0], str(old[1])+'.csv'), sep=',', index=False, header=False)
            output = []
        old = (date, slot)

    cust_node_id = order['node_id']
    rest_node_id = storetonodemap[order['store_id']]
    itemsList = getItemsList(order['order_items'], itemsDict)

    tmp = [receivedTime, cust_node_id, rest_node_id]
    st = ""
    for i in itemsList:
        st += str(i)+" "
    tmp.append(st)
    output.append(tmp)
#     t4 += time.time() - t
#     t = time.time()
    
print(receivedTime, old[0], old[1])
dftmp = pd.DataFrame(output)
dftmp.to_csv(join(tpath, date, str(slot)+'.csv'), sep=',', index=False, header=False)

print(t1,t2,t3,t4)



39612 2021-12-17 10
43200 2021-12-17 11
46800 2021-12-17 12
50401 2021-12-17 13
54004 2021-12-17 14
57601 2021-12-17 15
61200 2021-12-17 16
64802 2021-12-17 17
68400 2021-12-17 18
72001 2021-12-17 19
75600 2021-12-17 20
79200 2021-12-17 21
82800 2021-12-17 22
0 2021-12-17 23
3614 2021-12-17 0
7212 2021-12-17 1
10812 2021-12-17 2
39497 2021-12-17 3
39677 2021-12-18 10
43201 2021-12-18 11
46801 2021-12-18 12
50400 2021-12-18 13
54001 2021-12-18 14
57607 2021-12-18 15
61200 2021-12-18 16
64800 2021-12-18 17
68400 2021-12-18 18
72000 2021-12-18 19
75601 2021-12-18 20
79200 2021-12-18 21
82801 2021-12-18 22
2 2021-12-18 23
3602 2021-12-18 0
7202 2021-12-18 1
10804 2021-12-18 2
39604 2021-12-18 3
43219 2021-12-19 11
46802 2021-12-19 12
50401 2021-12-19 13
54000 2021-12-19 14
57601 2021-12-19 15
61200 2021-12-19 16
64801 2021-12-19 17
68400 2021-12-19 18
72001 2021-12-19 19
75600 2021-12-19 20
79201 2021-12-19 21
82807 2021-12-19 22
4 2021-12-19 23
3602 2021-12-19 0
7200 2021-12-19 1
10802 20

In [2]:
import json
import time
itemsDict = {}
old = (0,0)
output = []
j = 0
print(1,time.time())
for order in ordersDelhi:
    receivedTime = int(TimeToEpoch(order['order_received_time']))
#     print(order['order_received_time'])
    date = order['order_date']
    slot = getTimeIndex(order['order_received_time'])
    if (date, slot) != old:
        if(old != (0, 0)):
            j+=1
            print(receivedTime, old[0], old[1])
            print(1,time.time())
            dftmp = pd.DataFrame(output)
            print(1,time.time())
#             dftmp.to_csv('./orders/'+str(old[0])+'/'+str(old[1])+".csv", sep=' ', index=False, header=False)
            print(1,time.time())
            output = []
        old = (date, slot)
        if j>=2:
            break

    cust_lat = order['customer_lat']
    cust_lng = order['customer_lng']
    cust_node_id = latLongToNodeId(cust_lat, cust_lng, nodeToLatLongMap)
    rest_node_id = storeToNodeMap[order['store_id']]
    itemsList = getItemsList(order['order_items'], itemsDict)
    
    tmp = [receivedTime, cust_node_id, rest_node_id]
#     st = ""
    for i in itemsList:
        tmp.append(i)
#         st += str(i)+" "

#     tmp.append(st)
    output.append(tmp)
    
print(1,time.time())
print(receivedTime, old[0], old[1])
print(1,time.time())
dftmp = pd.DataFrame(output)
print(1,time.time())
# dftmp.to_csv('./orders/'+str(old[0])+'/'+str(old[1])+".csv", sep=',', index=False, header=False)
print(1,time.time())

1 1663707457.4801888


NameError: name 'ordersDelhi' is not defined

In [36]:
output

[]

In [None]:
import time
time.time()

In [None]:
### screen starts at 6:50 am