In [1]:
import pandas as pd
import pyproj
import time

# I. Acquire Data
Load rectangular grids generated by QGIS

In [2]:
grid_fp = "data/penang_grid_EPSG3857_WGS84_v3.csv"
grid_df = pd.read_csv(grid_fp)
grid_df["id"] = grid_df["id"].apply(lambda grid_id: str(grid_id))
grid_df = grid_df.set_index("id")
grid_df = grid_df.dropna()
print(grid_df.shape)
grid_df.head()

(1199, 5)


Unnamed: 0_level_0,left,top,right,bottom,district
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13,11151054.94,610925.2518,11152054.94,609925.2518,Barat Daya
14,11151054.94,609925.2518,11152054.94,608925.2518,Barat Daya
16,11151054.94,607925.2518,11152054.94,606925.2518,Barat Daya
17,11151054.94,606925.2518,11152054.94,605925.2518,Barat Daya
18,11151054.94,605925.2518,11152054.94,604925.2518,Barat Daya


Convert WGS84 coordinate system to latitude/longitude

In [3]:
def convert_utm_coords(coords, inProj, outProj):
    lng, lat = pyproj.transform(inProj, outProj, coords[0], coords[1])
    return pd.Series([lng, lat])

In [4]:
inProj = pyproj.Proj(init='epsg:3857')
outProj = pyproj.Proj(init='epsg:4326')
start_time = time.time()
print("Converting UTM coordinates to latitude/longitude ...")
grid_df[["left_lng", "top_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["left", "top"]], inProj, outProj), axis=1)
grid_df[["right_lng", "bottom_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["right", "bottom"]], inProj, outProj), axis=1)
print("Elapsed time: %s seconds ..." %round(time.time() - start_time, 4))
grid_df.head()

Converting UTM coordinates to latitude/longitude ...
Elapsed time: 43.0936 seconds ...


Unnamed: 0_level_0,left,top,right,bottom,district,left_lng,top_lat,right_lng,bottom_lat
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
13,11151054.94,610925.2518,11152054.94,609925.2518,Barat Daya,100.171631,5.479662,100.180614,5.47072
14,11151054.94,609925.2518,11152054.94,608925.2518,Barat Daya,100.171631,5.47072,100.180614,5.461778
16,11151054.94,607925.2518,11152054.94,606925.2518,Barat Daya,100.171631,5.452835,100.180614,5.443893
17,11151054.94,606925.2518,11152054.94,605925.2518,Barat Daya,100.171631,5.443893,100.180614,5.43495
18,11151054.94,605925.2518,11152054.94,604925.2518,Barat Daya,100.171631,5.43495,100.180614,5.426007


# II. Assign Supermarkets to Grids

Load existing Penang supermarkets obtained via Google Places API

In [5]:
suppliers_fp = "data/suppliers_penang.csv"
suppliers_df = pd.read_csv(suppliers_fp)
suppliers_df.head()

Unnamed: 0,addr,lat,lng,name,type
0,"Penang Plaza, 126, Jalan Burmah, 10050 George ...",5.421493,100.325147,Giant Supermarket Penang Plaza,supermarkets
1,"Axis Complex, Pulau Tikus, 10350 George Town, ...",5.431337,100.31129,Bandar Baru (Pulau Tikus) Supermarket,supermarkets
2,"B1-15, Gurney Plaza, 170, Pesiaran Gurney, 102...",5.437011,100.308799,Cold Storage,supermarkets
3,"2 A, Jalan Angsana, Bandar Baru Ayer Itam, 115...",5.38769,100.283356,New City Supermarket,supermarkets
4,"1, Jalan Dato Keramat, 10000 George Town, Pula...",5.413531,100.328162,Gama Supermaket & Departmental Store,supermarkets


In [6]:
def assign_grid(coords, grid_dict):
    for grid_id, boundaries in grid_dict.items():
        if coords[0] > boundaries["left_lng"] and \
           coords[0] < boundaries["right_lng"] and \
           coords[1] > boundaries["bottom_lat"] and \
           coords[1] < boundaries["top_lat"]:
            return str(grid_id)
    return None

In [8]:
grid_dict = grid_df.to_dict('index')
print("Range of longitude: ", suppliers_df["lng"].min(), suppliers_df["lng"].max())
print("Range of latitude: ", suppliers_df["lat"].min(), suppliers_df["lat"].max())
start_time = time.time()
print("Assign exisiting supermarket to grids ...")
suppliers_df["grid"] = suppliers_df.apply(lambda row: assign_grid(row[["lng", "lat"]], grid_dict), axis=1)
print("Elapsed time: %s seconds ..." %round(time.time() - start_time, 4))
suppliers_df.head()

Range of longitude:  100.2781947 100.3295986
Range of latitude:  5.387589 5.4502578
Assign exisiting supermarket to grids ...
Elapsed time: 0.1813 seconds ...


Unnamed: 0,addr,lat,lng,name,type,grid
0,"Penang Plaza, 126, Jalan Burmah, 10050 George ...",5.421493,100.325147,Giant Supermarket Penang Plaza,supermarkets,920
1,"Axis Complex, Pulau Tikus, 10350 George Town, ...",5.431337,100.31129,Bandar Baru (Pulau Tikus) Supermarket,supermarkets,813
2,"B1-15, Gurney Plaza, 170, Pesiaran Gurney, 102...",5.437011,100.308799,Cold Storage,supermarkets,812
3,"2 A, Jalan Angsana, Bandar Baru Ayer Itam, 115...",5.38769,100.283356,New City Supermarket,supermarkets,659
4,"1, Jalan Dato Keramat, 10000 George Town, Pula...",5.413531,100.328162,Gama Supermaket & Departmental Store,supermarkets,921


In [None]:
### III. 