In [151]:
import requests
import urllib
import pandas as pd
from geopy.geocoders import Nominatim
from tqdm import tqdm
import folium 
from geopy.geocoders import GoogleV3, Bing
from yaml import safe_load
from scipy.spatial import distance_matrix
import numpy as np
from geopy.distance import geodesic

In [70]:
with open('api_keys.yaml', 'r') as f:
    API_KEY = safe_load(f)['bing']

In [26]:
url = r'https://nationalmap.gov/epqs/pqs.php?'

CITIES_DATA_PATH = 'data/Cities.csv'
geolocator = Bing(api_key=API_KEY)

In [27]:
def get_coordinates_elevation(df, lat_column, lon_column):
    """Query service using lat, lon. add the elevation values as a new column."""
    elevations = []
    for lat, lon in zip(df[lat_column], df[lon_column]):

        # define rest query params
        params = {
            'output': 'json',
            'x': lon,
            'y': lat,
            'units': 'Meters'
        }

        # format query string and return query value
        result = requests.get((url + urllib.parse.urlencode(params)))
        elevations.append(result.json()['USGS_Elevation_Point_Query_Service']['Elevation_Query']['Elevation'])

    df['elev_meters'] = elevations

In [28]:
cities_df = pd.read_csv(CITIES_DATA_PATH)
cities_df.head(5)
cities_df['full_name'] = cities_df['City'] + ', ' + cities_df['State'] 
#

In [71]:
cities_df

Unnamed: 0,CityID,City,State,full_name,coordinates
0,4001,Abilene,TX,"Abilene, TX","(32.45357132, -99.73264313)"
1,4002,Akron,OH,"Akron, OH","(41.08481979, -81.51560974)"
2,4003,Albany,NY,"Albany, NY","(42.65172577, -73.75509644)"
3,4004,Albuquerque,NM,"Albuquerque, NM","(35.08424759, -106.64923859)"
4,4005,Allentown,PA,"Allentown, PA","(40.60275269, -75.46975708)"
...,...,...,...,...,...
467,4490,Easton,MA,"Easton, MA","(42.02978897, -71.1314621)"
468,4491,Laval,QC,"Laval, QC","(45.5837326, -73.75006866)"
469,4492,London,GB,"London, GB","(51.50015259, -0.12623601)"
470,4493,Union,NJ,"Union, NJ","(40.69643784, -74.26988983)"


In [29]:
coordinates = []
for city in tqdm(cities_df['full_name'].values):
    try:
        location = geolocator.geocode(city)

        city_coordinates = (location.latitude, location.longitude)
        coordinates.append(city_coordinates)
    except:
        coordinates.append(None)



100%|██████████| 472/472 [01:22<00:00,  5.75it/s]


In [30]:
cities_df['coordinates'] = coordinates
cities_df.coordinates.isna().value_counts()

False    472
Name: coordinates, dtype: int64

In [31]:
cities_df.full_name.iloc[0]

'Abilene, TX'

In [32]:
for row in cities_df.head(5):
    print(row)

CityID
City
State
full_name
coordinates


In [35]:
us_map = folium.Map(location=[48, -102], zoom_start=3)

# Check whether cities are indeed in the us; 
for city_name, city_coords in zip(cities_df.full_name.values, cities_df.coordinates.values):
    try:
        latitude, longitude = city_coords
        folium.Marker([latitude, longitude], popup=city_name).add_to(us_map)
    except:
        print(city_coords)



In [36]:
us_map

In [45]:
cities_df

Unnamed: 0,CityID,City,State,full_name,coordinates
0,4001,Abilene,TX,"Abilene, TX","(32.45357132, -99.73264313)"
1,4002,Akron,OH,"Akron, OH","(41.08481979, -81.51560974)"
2,4003,Albany,NY,"Albany, NY","(42.65172577, -73.75509644)"
3,4004,Albuquerque,NM,"Albuquerque, NM","(35.08424759, -106.64923859)"
4,4005,Allentown,PA,"Allentown, PA","(40.60275269, -75.46975708)"
...,...,...,...,...,...
467,4490,Easton,MA,"Easton, MA","(42.02978897, -71.1314621)"
468,4491,Laval,QC,"Laval, QC","(45.5837326, -73.75006866)"
469,4492,London,GB,"London, GB","(51.50015259, -0.12623601)"
470,4493,Union,NJ,"Union, NJ","(40.69643784, -74.26988983)"


In [59]:
latitudes_, longitudes_ = [], []
for (latitude, longitude) in cities_df.coordinates:
    latitudes_.append(latitude)
    longitudes_.append(longitude)

In [104]:
simplified_cities_df = pd.DataFrame( index=cities_df.CityID)
simplified_cities_df['lat'] = latitudes_

simplified_cities_df['long'] = longitudes_
simplified_cities_df['coordinates'] = cities_df.coordinates.values
simplified_cities_df['full_name'] = cities_df.full_name.values

In [128]:
simplified_cities_df['coordinates'] = simplified_cities_df.coordinates.map(lambda x: np.array(x, dtype=np.float32))

In [129]:
simplified_cities_df.head(10)

Unnamed: 0_level_0,lat,long,coordinates,full_name
CityID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4001,32.453571,-99.732643,"[32.45357, -99.73264]","Abilene, TX"
4002,41.08482,-81.51561,"[41.08482, -81.51561]","Akron, OH"
4003,42.651726,-73.755096,"[42.651726, -73.7551]","Albany, NY"
4004,35.084248,-106.649239,"[35.084248, -106.64924]","Albuquerque, NM"
4005,40.602753,-75.469757,"[40.602753, -75.46976]","Allentown, PA"
4006,42.026802,-93.620178,"[42.026802, -93.62018]","Ames, IA"
4007,42.375805,-72.519875,"[42.375805, -72.519875]","Amherst, MA"
4008,33.834492,-117.915642,"[33.83449, -117.91564]","Anaheim, CA"
4009,61.216583,-149.899597,"[61.216583, -149.8996]","Anchorage, AK"
4010,42.281418,-83.748474,"[42.281418, -83.748474]","Ann Arbor, MI"


In [130]:
simplified_cities_df.to_csv('data/Cities_coordinates.csv')

In [166]:
def get_distances(locations_1, locations_2):
    distances = []
    for coord_1 in tqdm(locations_1):
        d_horizontal = []
        for coord_2 in locations_2:
            distance = geodesic(coord_1, coord_2).km
            d_horizontal.append(distance)
        distances.append(d_horizontal)
    return distances

In [167]:
locations_1 = np.vstack(simplified_cities_df.coordinates.to_numpy())
locations_2 = np.vstack(simplified_cities_df.coordinates.to_numpy())

In [168]:
locations_1.shape

(472, 2)

In [169]:
distances = get_distances(locations_1, locations_2)

100%|██████████| 472/472 [00:18<00:00, 24.99it/s]


In [170]:
np.array(distances)

array([[    0.        ,  1881.81915622,  2545.12843977, ...,
         7867.40552102,  2444.62265087,  5729.7412425 ],
       [ 1881.81915622,     0.        ,   667.10254474, ...,
         6046.35778965,   611.97004285,  7283.84836171],
       [ 2545.12843977,   667.10254474,     0.        , ...,
         5436.34949805,   221.35707773,  7892.93286588],
       ...,
       [ 7867.40552102,  6046.35778965,  5436.34949805, ...,
            0.        ,  5604.11185583, 11672.26098802],
       [ 2444.62265087,   611.97004285,   221.35707773, ...,
         5604.11185583,     0.        ,  7893.3614977 ],
       [ 5729.7412425 ,  7283.84836171,  7892.93286588, ...,
        11672.26098802,  7893.3614977 ,     0.        ]])

In [172]:
pd.DataFrame(distances, index = simplified_cities_df.full_name.values, columns= simplified_cities_df.full_name.values).to_csv('data/Cities_geodisic_distances.csv')