In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import json

# Add node demand information to the data

In [2]:
# read node information 
node_path = '../data/processed_data/node_information.csv'
node = pd.read_csv(node_path)

In [3]:
node.head()

Unnamed: 0,node_id,longitude,latitude
0,1,-122.019001,37.814343
1,2,-122.018928,37.814268
2,3,-122.02131,37.817351
3,4,-122.021319,37.817339
4,5,-122.020071,37.816446


In [4]:
# read demand 
demand_path = '../data/EBMUD-DATA/ebmud_demand.geojson'
demand = gpd.read_file(demand_path)

In [5]:
demand.head()

Unnamed: 0,OBJECTID,OBJ_ID,COORD_NO_1,COORD_EA_1,ID_New,PZ,MM_2018_gp,ASD_2018_g,AWD_2018_g,AAD_2018_g,HX_PIPE,geometry
0,1,124668.0,2207007.042,6058094.429,232762RODEOCrockett Aqueduct,A0A,2932.413672,3005.966249,3090.113885,3141.383633,,POINT (-122.24875 38.04316)
1,2,388537.0,2169405.652,6019891.334,275000RICHMONDCastro St,G0A1,2750.143026,2705.053616,2776.053519,2591.204475,,POINT (-122.37876 37.93788)
2,3,12489.0,2211597.852,6065224.328,009318CROCKETTHighway 40,A0B,1268.422152,1249.929965,1054.59354,1172.101328,,POINT (-122.22429 38.05613)
3,4,35459.0,2169328.434,6019860.118,236705RICHMONDCastro St,G0A1,638.579507,831.96931,780.1017,721.593392,,POINT (-122.37886 37.93767)
4,5,296811.0,2146007.317,6054800.24,304920BERKELEYBancroft Way,A5A,501.36273,510.17268,423.518779,474.730692,,POINT (-122.25621 37.87550)


In [6]:
demand_lon = []
demand_lat = []

for geom in demand['geometry']:
    demand_lon.append(geom.x)
    demand_lat.append(geom.y)

In [7]:
demand['longitude'] = demand_lon
demand['latitude'] = demand_lat

In [8]:
valid_column = ['OBJECTID','longitude','latitude','MM_2018_gp']

In [9]:
demand_clean = demand[valid_column]
demand_clean.head()

Unnamed: 0,OBJECTID,longitude,latitude,MM_2018_gp
0,1,-122.24875,38.043159,2932.413672
1,2,-122.378762,37.937884,2750.143026
2,3,-122.224289,38.056127,1268.422152
3,4,-122.378865,37.93767,638.579507
4,5,-122.256212,37.875499,501.36273


In [10]:
# demand_id_map maps demand id to node id; the demand_on_nodes.csv 
# is generated by another program (cityscape, an abandoned project), 
# you can do it by yourself too using shapely 
demand_id_map_path ='../data/EBMUD-DATA/demand_on_nodes.csv'
demand_id_map = pd.read_csv(demand_id_map_path)

In [11]:
node_ids = list(demand_id_map['pipe_node_id'])
demand_clean['node_id'] = node_ids

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [12]:
mapped_demand = demand_clean.groupby(['node_id'], as_index=False).agg('sum')

In [13]:
valid_columns = ['node_id','MM_2018_gp']
mapped_demand = mapped_demand[valid_columns]

In [14]:
node_with_demand = node.merge(mapped_demand,on = 'node_id',how='left')

In [15]:
node_with_demand['MM_2018_gp'] = node_with_demand['MM_2018_gp'].fillna(0)
node_with_demand.head()

Unnamed: 0,node_id,longitude,latitude,MM_2018_gp
0,1,-122.019001,37.814343,0.0
1,2,-122.018928,37.814268,0.0
2,3,-122.02131,37.817351,0.0
3,4,-122.021319,37.817339,0.0
4,5,-122.020071,37.816446,0.0


In [16]:
node_save_path = '../data/processed_data/node_information.csv'
node_with_demand.to_csv(node_save_path,index=False)

# Add node elevation information from USGS database

In [17]:
import requests
import urllib
import pandas as pd
import time

# USGS Elevation Point Query Service
url = r'https://nationalmap.gov/epqs/pqs.php?'

def get_elevation(loc):
    lat,lon = loc
    params = {
            'output': 'json',
            'x': lon,
            'y': lat,
            'units': 'Feet'
        }
    try:
        result = requests.get((url + urllib.parse.urlencode(params)))
    except:
        time.sleep(1)
        result = requests.get((url + urllib.parse.urlencode(params)))
    elevation = result.json()['USGS_Elevation_Point_Query_Service']['Elevation_Query']['Elevation']
    return elevation
        
    

In [18]:
from concurrent.futures import ThreadPoolExecutor, as_completed
start = time.time()

lats = node_with_demand['latitude']
lons = node_with_demand['longitude']

processes = []
elevations = []
with ThreadPoolExecutor(max_workers=200) as executor:
    for loc in zip(lats,lons):
        processes.append(executor.submit(get_elevation,loc))
    
print(f'Time taken: {time.time() - start}')



Time taken: 1579.3614809513092


In [19]:
elevations = []
for i,task in enumerate(processes):
    try:
        elevation = task.result()
    except:
        print (i)
    elevations.append(elevation)
node_with_demand['elevation'] = elevations

34930
34987
45220
45226
45236
45244
45250
45252
45257
45261
45269
45275
45280
45967
46122
46200
46215
46222
46234
46247
46260
46262
46264
46276
46284
46291
46342
46382
46388
46397
46418
46429
46452
46466
46514
46534
46551
46560
46571
46595
46618
46619
46627
46713
46743
46755
46773
46781
46801
46830
46848
46873
46875
46895
46935
46949
46957
46973
47030
47053
47061
47076
47082
47115
47123
47124
47129
47144
47150
47169
47180
47214
47231
47251
47288
47312
47349
47405
47787
48510
48513
48710
73232


In [20]:
node_with_demand.head()

Unnamed: 0,node_id,longitude,latitude,MM_2018_gp,elevation
0,1,-122.019001,37.814343,0.0,998.47
1,2,-122.018928,37.814268,0.0,996.08
2,3,-122.02131,37.817351,0.0,859.29
3,4,-122.021319,37.817339,0.0,859.29
4,5,-122.020071,37.816446,0.0,900.12


In [21]:
save_path = '../data/processed_data/node_information.csv'
node_with_demand.to_csv(save_path,index=False)