In [2]:
import pandas as pd
from geojson import Feature, Point, MultiPoint, Polygon, FeatureCollection
import numpy as np
from scipy.spatial import ConvexHull
from scipy import spatial
import geopandas as gpd

In [178]:
buildings_all = gpd.read_file('all_buildings_SF_footprint_elev_2.shp')
business = gpd.read_file('business_SF_WGS.shp')
residential_buildings = gpd.read_file('residential_buildings_footprint_2.shp')

# Create Tree

In [179]:
buildings_all['OBJECTID'] = buildings_all.index
X_lat_lon = list(zip(buildings_all['y_lat'],buildings_all['x_lon']))
tree = spatial.KDTree(X_lat_lon)
X_lat_lon_emp = list(zip(business['lat'],business['lng']))

# Find closest building

In [180]:
location = 0
for item in X_lat_lon_emp:
    dist, index_select = tree.query(item, k=1)
    close_to = int(buildings_all.iloc[int(index_select)]['OBJECTID_1'])
    business.loc[int(location),'close_id'] = close_to
    location+=1
    

# Dissolve by closest building and add employee count

In [181]:
new_business = business[['geometry','close_id', 'EMPNUM']]
employee_count = new_business.dissolve(by='close_id', aggfunc='sum')

In [182]:
employee_buildings = buildings_all.join(employee_count, on='OBJECTID_1', how = 'inner', rsuffix='_business')

In [183]:
employee_buildings = employee_buildings.drop('geometry_business',1)

In [184]:
employee_buildings['index_vals'] = employee_buildings['OBJECTID']
employee_buildings['OBJECTID'] = employee_buildings['OBJECTID_1']

# Merge with residential buildings

In [185]:
frames = [residential_buildings, employee_buildings]
result = pd.concat(frames)

In [186]:
subset_result = result[['OBJECTID', 'geometry' ,'SUM_pop', 'EMPNUM']]

# Dissolve based on OBJECTID and sum population and employee count

In [187]:
combined_buildings_sum = subset_result.dissolve(by='OBJECTID', aggfunc='sum')

In [188]:
combined_buildings = buildings_all.join(combined_buildings_sum, on='OBJECTID_1', how = 'inner', rsuffix='_combined')

In [189]:
combined_buildings['residential_pop'] = combined_buildings['SUM_pop']

In [190]:
combined_buildings['SUM_pop'] = combined_buildings.residential_pop.fillna(0) + combined_buildings.EMPNUM.fillna(0)

In [191]:
new_combined_buildings = combined_buildings[['num_floor', 'Area_m2', 'x_lon', 'y_lat', 'SUM_pop', 'residential_pop', 'EMPNUM', 'ELEV_treat']]

In [192]:
new_combined_buildings['lat_lon'] = new_combined_buildings.apply(lambda row: (row['y_lat'], row['x_lon']), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [193]:
new_combined_buildings.to_csv('combined_buildings_2.csv')