# Opencage Geocoding

In [None]:
# if you dont already have the required libraries, you can install them by running this codeblock
!pip install opencage shapely tqdm ipywidgets geopandas



In [3]:
import os
# Creating paths
in_folderpath = os.path.join("..", "in")

os.makedirs(in_folderpath, exist_ok=True)

In [5]:
import geopandas as gpd

# Loading the data into A geopandas dataframe
gdf = gpd.read_file(os.path.join(in_folderpath, "datacentermap.csv"))

# Filtering the dataframe to only include locations within the US
gdf_usa = gdf[gdf['country'] == 'USA']

print(len(gdf_usa))

2526


We use opencage for our geocoding. If you want to run the script yourself, you'll need your own opencage API key, which can be obtained by signing up here: https://opencagedata.com/users/sign_up

In [1]:
from opencage.geocoder import OpenCageGeocode
from shapely.geometry import Point
from tqdm import tqdm

In [8]:
api_key = 'INSERT API KEY HERE'

geolocator = OpenCageGeocode(api_key)

# Iterate through each row to check and update geometry
for i, row in tqdm(gdf_usa.iterrows(), desc="Geocoding Addresses"):

    if row['geometry'] is None or row['geometry'].is_empty:
        # Creates an address for the geolocator
        address = f"{row['address']}, {row['postal']}, {row['city']}, {row['state']}, {row['country']}"
        print(f'Looking up: {address}')
        # Returns a list of dictionaries with spatial information about the data center 
        location = geolocator.geocode(address)
        print(f'Geocoding result: {location}')
        
        if location:

            # Retrieves the geometry from the list of dictionaries
            geometry = location[0]['geometry']

            # Adds the retrived latitude and longditude to each row in data frame
            gdf_usa.at[i, 'geometry'] = Point(geometry['lng'], geometry['lat'])
            gdf_usa.at[i, 'latitude'] = geometry['lat']
            gdf_usa.at[i, 'longitude'] = geometry['lng']
        else:
            print("Geometry not found")


gdf_usa_filtered = gdf_usa[gdf_usa['geometry'].notna()]

print(len(gdf_usa_filtered))

csv_path = os.path.join(in_folderpath, "datacenters_usa.csv")

gdf_usa_filtered.to_csv(csv_path, index=False)

print("GeoJSON file has been formatted and saved.")

Geocoding Addresses: 0it [00:00, ?it/s]

Lokking up: 1 Ramland Rd, 10962, Orangeburg, New York, USA


Geocoding Addresses: 0it [00:00, ?it/s]

Geocoding result: [{'annotations': {'DMS': {'lat': "41° 2' 3.65172'' N", 'lng': "73° 58' 32.73348'' W"}, 'FIPS': {'county': '36087', 'state': '36'}, 'MGRS': '18TWL8609643075', 'Maidenhead': 'FN31aa28vf', 'Mercator': {'x': -8234943.856, 'y': 4989350.749}, 'OSM': {'edit_url': 'https://www.openstreetmap.org/edit?node=8788963998#map=17/41.03435/-73.97576', 'note_url': 'https://www.openstreetmap.org/note/new#map=17/41.03435/-73.97576&layers=N', 'url': 'https://www.openstreetmap.org/?mlat=41.03435&mlon=-73.97576#map=17/41.03435/-73.97576'}, 'UN_M49': {'regions': {'AMERICAS': '019', 'NORTHERN_AMERICA': '021', 'US': '840', 'WORLD': '001'}, 'statistical_groupings': ['MEDC']}, 'callingcode': 1, 'currency': {'alternate_symbols': ['US$'], 'decimal_mark': '.', 'disambiguate_symbol': 'US$', 'html_entity': '$', 'iso_code': 'USD', 'iso_numeric': '840', 'name': 'United States Dollar', 'smallest_denomination': 1, 'subunit': 'Cent', 'subunit_to_unit': 100, 'symbol': '$', 'symbol_first': 1, 'thousands_sep




## References 

### Nominatim

https://medium.com/@gopesh3652/geocoding-with-python-using-nominatim-a-beginners-guide-220b250ca48d 

https://geopy.readthedocs.io/en/stable/ 
