In [4]:
import pandas as pd
import pyproj

# I. Load Grids
Load rectangular grids generated by QGIS

In [5]:
grid_fp = "/home/swang/Desktop/shenghao-repos/asiatique/data/penang_grid_EPSG3857_WGS84.csv"
grid_df = pd.read_csv(grid_fp)
grid_df = grid_df.set_index("id")
grid_df.head()

Unnamed: 0_level_0,left,top,right,bottom
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,11151270.0,623329.070863,11152270.0,622329.070863
2,11151270.0,622329.070863,11152270.0,621329.070863
3,11151270.0,621329.070863,11152270.0,620329.070863
4,11151270.0,620329.070863,11152270.0,619329.070863
5,11151270.0,619329.070863,11152270.0,618329.070863


Convert WGS84 coordinate system to latitude/longitude

In [7]:
def convert_utm_coords(coords, inProj, outProj):
    lng, lat = pyproj.transform(inProj, outProj, coords[0], coords[1])
    return pd.Series([lng, lat])

In [8]:
inProj = pyproj.Proj(init='epsg:3857')
outProj = pyproj.Proj(init='epsg:4326')
grid_df[["left_lng", "top_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["left", "top"]], inProj, outProj), axis=1)
grid_df[["right_lng", "bottom_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["right", "bottom"]], inProj, outProj), axis=1)
grid_df.head()

Unnamed: 0_level_0,left,top,right,bottom,left_lng,top_lat,right_lng,bottom_lat
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,11151270.0,623329.070863,11152270.0,622329.070863,100.17356,5.590568,100.182544,5.581628
2,11151270.0,622329.070863,11152270.0,621329.070863,100.17356,5.581628,100.182544,5.572687
3,11151270.0,621329.070863,11152270.0,620329.070863,100.17356,5.572687,100.182544,5.563746
4,11151270.0,620329.070863,11152270.0,619329.070863,100.17356,5.563746,100.182544,5.554805
5,11151270.0,619329.070863,11152270.0,618329.070863,100.17356,5.554805,100.182544,5.545864


# II. Assign Residential Buildings to Grids

In [25]:
def assign_grid(coords, grid_dict):
    for grid_id, boundaries in grid_dict.items():
        if coords[0] > boundaries["left_lng"] and \
           coords[0] < boundaries["right_lng"] and \
           coords[1] > boundaries["bottom_lat"] and \
           coords[1] < boundaries["top_lat"]:
            return grid_id
    return None

In [26]:
grid_dict = grid_df.to_dict('index')
buildings_fp = "/home/swang/Desktop/shenghao-repos/asiatique/data/penang_residential_buildings.csv"
buildings_df = pd.read_csv(buildings_fp)
print("Range of longitude: ", buildings_df["center_lng"].min(), buildings_df["center_lng"].max())
print("Range of latitude: ", buildings_df["center_lat"].min(), buildings_df["center_lat"].max())
buildings_df["grid"] = buildings_df.apply(lambda row: assign_grid(row[["center_lng", "center_lat"]], grid_dict), axis=1)
buildings_df = buildings_df.set_index("id")
buildings_df.head()

Range of longitude:  100.19315481666666 100.534263925
Range of latitude:  5.1504309 5.531056700000001


Unnamed: 0,id,name,type,area,center_lng,center_lat,grid
0,0,Forest Field,apartments,941.85125,100.294707,5.435854,707
1,1,Park Avenue,apartments,1198.2022,100.29523,5.434696,707
2,2,,apartments,1298.948694,100.286958,5.392957,659
3,3,,apartments,2041.100327,100.28602,5.393071,659
4,4,,apartments,1298.708829,100.286121,5.393432,659


# III. Compute Gridwise Total Floor Area
Check out available building types.

In [28]:
print("All building types: ", buildings_df["type"].unique())
buildings_df.groupby(['type'])['area'].agg('sum')

All building types:  ['apartments' 'residential' 'bungalow' 'dormitory' 'detached']


type
apartments     3.437263e+06
bungalow       1.605638e+05
detached       1.305754e+04
dormitory      2.046372e+03
residential    6.333936e+04
Name: area, dtype: float64

In [30]:
def check_bungalow(building_type, area):
    return pd.Series([0, area]) if building_type == 'bungalow' else pd.Series([area, 0])

In [31]:
buildings_df[["area", "area_bungalow"]] = buildings_df.apply(lambda row: check_bungalow(row["type"], row["area"]), axis=1)
buildings_df.head()

Unnamed: 0_level_0,name,type,area,center_lng,center_lat,grid,area_bungalow
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Forest Field,apartments,941.85125,100.294707,5.435854,707,0.0
1,Park Avenue,apartments,1198.2022,100.29523,5.434696,707,0.0
2,,apartments,1298.948694,100.286958,5.392957,659,0.0
3,,apartments,2041.100327,100.28602,5.393071,659,0.0
4,,apartments,1298.708829,100.286121,5.393432,659,0.0


In [34]:
area_df = buildings_df.groupby(['grid'])['area', 'area_bungalow'].agg('sum')
area_df.head()

Unnamed: 0_level_0,area,area_bungalow
grid,Unnamed: 1_level_1,Unnamed: 2_level_1
141,4000.365329,0.0
174,5421.557252,0.0
186,6577.785922,0.0
227,4596.24725,0.0
228,2773.381859,0.0
