In [None]:
! pip3 install boto3
! pip3 install python-dotenv
! pip3 install requests
! pip3 install pandas

In [3]:
from dotenv import load_dotenv
import os
import boto3

# Load environment variables from .env file
load_dotenv()

# Create a client for Amazon Location service
amazon_location_client = boto3.client(
    "location",
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
    region_name=os.getenv("AWS_REGION"),
)


In [4]:
def amazon_location_geocoder(geocoder, address):
    # Search the Amazon Location Service place index for the given address text
    result = amazon_location_client.search_place_index_for_text(
        IndexName=geocoder, MaxResults=1, Text=address["text"]
    )

    # Extract geocoding results from the API response
    for result in result["Results"]:
        place = result["Place"]
        street_number = place.get("AddressNumber")
        street_name = place.get("Street")
        city = place.get("Municipality")
        province = place.get("Region")
        postal_code = place.get("PostalCode")
        country = place.get("Country")
        label = place.get("Label")
        longitude = place.get("Geometry").get("Point")[0]
        latitude = place.get("Geometry").get("Point")[1]

    # Return geocoding results in a standardized format
    return [
        {
            "id": address["id"],
            "street_number": street_number,
            "street_name": street_name,
            "city": city,
            "province": province,
            "postal_code": postal_code,
            "country": country,
            "label": label,
            "longitude": longitude,
            "latitude": latitude,
        }
    ]


In [7]:
import requests


def canada_post_geocoder(address):
    # Build URL for GET request to Canada Post API
    url = f"https://ws1.postescanada-canadapost.ca/addresscomplete/interactive/find/v2.10/json.ws?key={os.getenv('CANADA_POST_API_KEY')}&provider=AddressComplete&package=Interactive&service=Find&version=2.1&endpoint=json.ws&SearchTerm={address['text']}&MaxSuggestions=1"

    # Send GET request to Canada Post API with the constructed URL
    response = requests.request("GET", url)

    # Parse the response as JSON format
    data = response.json()

    # Extract Id from the response
    id = data[0]["Id"]

    # Build URL for GET request to retrieve full geocoded address details from Canada Post API
    url = f"https://ws1.postescanada-canadapost.ca/addresscomplete/interactive/retrieve/v2.11/json.ws?key={os.getenv('CANADA_POST_API_KEY')}&provider=AddressComplete&package=Interactive&service=Retrieve&version=2.11&endpoint=json.ws&Id={id}"

    # Send GET request to Canada Post API
    response = requests.request("GET", url)

    # Parse the response as JSON format
    results = response.json()

    # Extract geocoding results from the API response
    for result in results:
        if "Language" in result and result["Language"] == "ENG":
            street_number = result.get("BuildingNumber")
            street_name = result.get("Street")
            city = result.get("City")
            province = result.get("ProvinceName")
            postal_code = result.get("PostalCode")
            country = result.get("CountryName")
            label = result.get("Label")

    # Return geocoding results in a standardized format
    return [
        {
            "id": address["id"],
            "street_number": street_number,
            "street_name": street_name,
            "city": city,
            "province": province,
            "postal_code": postal_code,
            "country": country,
            "label": label,
            "longitude": None,
            "latitude": None,
        }
    ]


In [10]:
import pandas as pd


def save_csv(data, file_name):
    # Normalize JSON data into a flat table
    df = pd.json_normalize(data)
    # Save the data as a CSV file
    df.to_csv(file_name, encoding="utf-8", index=False)


In [None]:
# Scenario 1: address as is (incomplete)

import csv
import re

with open("input/input-addresses.csv", newline="") as csvfile:
    amazon_location_esri_result = []
    amazon_location_here_result = []
    canada_post_result = []

    # Read CSV file and iterate over rows
    reader = csv.reader(csvfile, delimiter=",", quotechar="|")

    # Skip header row
    next(reader, None)
    for row in reader:
        print(row[0])

        # Create a dictionary containing the address information
        address = {"id": row[0], "text": f"{row[1]}"}

        try:
            # Call geocoding functions for Amazon Location Service with Esri and HERE providers,
            # and Canada Post
            amazon_location_esri_result.append(
                amazon_location_geocoder("esri-geocoder", address)
            )
            amazon_location_here_result.append(
                amazon_location_geocoder("here-geocoder", address)
            )
            canada_post_result.append(canada_post_geocoder(address))
        except Exception as e:
            print(e, row)

    # Save the geocoding results to CSV files
    save_csv(amazon_location_esri_result,
             "output/esri-geocoding-result-scenario-1.csv")
    save_csv(amazon_location_here_result,
             "output/here-geocoding-result-scenario-1.csv")
    save_csv(canada_post_result,
             "output/canada-post-geocoding-result-scenario-1.csv")


In [None]:
# Scenario 2: complete address

with open("input/input-addresses.csv", newline="") as csvfile:
    amazon_location_esri_result = []
    amazon_location_here_result = []
    canada_post_result = []

    reader = csv.reader(csvfile, delimiter=",", quotechar="|")

    next(reader, None)
    for row in reader:
        print(row[0])
        address = {"id": row[0], "text": f"{row[1]}, Calgary, AB, Canada"}

        try:
            amazon_location_esri_result.append(
                amazon_location_geocoder("esri-geocoder", address)
            )
            amazon_location_here_result.append(
                amazon_location_geocoder("here-geocoder", address)
            )
            canada_post_result.append(canada_post_geocoder(address))
        except Exception as e:
            print(e, row)

    # Save the geocoding results to CSV files
    save_csv(amazon_location_esri_result,
             "output/esri-geocoding-result-scenario-2.csv")
    save_csv(amazon_location_here_result,
             "output/here-geocoding-result-scenario-2.csv")
    save_csv(canada_post_result,
             "output/canada-post-geocoding-result-scenario-2.csv")


In [None]:
# Scenario 3: misspelled address

with open("input/input-addresses.csv", newline="") as csvfile:
    amazon_location_esri_result = []
    amazon_location_here_result = []
    canada_post_result = []

    # Define a regular expression pattern to match the letters "e" or "w", ignoring case
    pattern = re.compile("[ew]", re.IGNORECASE)

    reader = csv.reader(csvfile, delimiter=",", quotechar="|")
    next(reader, None)
    for row in reader:
        print(row[0])

        # Split the address in the current row by whitespace
        address = row[1].split()
        # Retrieve the last item in the "address" list and remove any "e" or "w" characters found using the regular expression pattern
        address[-1] = re.sub(pattern, "", address[-1])
        # Rejoin the modified "address" list into a single string with whitespace in between each item
        misspelled_address = " ".join(address)

        address = {
            "id": row[0], "text": f"{misspelled_address}, Calgary, AB, Canada"}

        try:
            amazon_location_esri_result.append(
                amazon_location_geocoder("esri-geocoder", address))
            amazon_location_here_result.append(
                amazon_location_geocoder("here-geocoder", address))
            canada_post_result.append(canada_post_geocoder(address))
        except Exception as e:
            print(e, row)

    save_csv(amazon_location_esri_result,
             "output/esri-geocoding-result-scenario-3.csv")
    save_csv(amazon_location_here_result,
             "output/here-geocoding-result-scenario-3.csv")
    save_csv(canada_post_result,
             "output/canada-post-geocoding-result-scenario-3.csv")
