In [1]:
import csv
import logging
from pathlib import Path
from typing import Optional
from warnings import warn

import h3

In [2]:
dir_prj = Path.cwd().parent

dir_data = dir_prj/'data'

dir_raw = dir_data/'raw'
dir_out = dir_data/'processed'

log_level = 'DEBUG'
h3_res = 8
csv_raw = dir_raw / "need_h3_level8_records.csv"
csv_out = dir_out / f"poi_h3_{h3_res:02d}.csv"

# set up logger for notebook
logger = logging.getLogger('coordinates-to-h3')
logger.setLevel(level=log_level)

# set the default logger to the same level so messages bubble up
logging.basicConfig(level=log_level)

assert(csv_raw.exists())
csv_out

WindowsPath('D:/projects/coordinates-to-h3/data/processed/poi_h3_08.csv')

In [3]:
def add_h3_field(
    input_file: Path, 
    output_file: Path, 
    h3_resolution: int = 8,
    longitude_field: str = 'longitude', 
    latitude_field: str = 'latitude',
    output_h3_field_name: Optional[str] = None
):
    """
    Read a CSV file, calculate an H3 index from the coordinate columns and saves result to a new file.

    Args:
        input_file: path to the input CSV file
        output_file: path to the output CSV file
        h3_resolution: H3 resolution to use for calculating the H3 index
        longitude_field: field in input CSV with longitude (X) coordinate values
        latitude_field: field in input CSV with latitude (Y) coordinate values
        output_h3_field_name: field to add to output CSV with H3 indices
    """
    # make sure the resolution is, in fact, a numeric integer
    if isinstance(h3_resolution, str):
        if not h3_resolution.isdigit():
            raise ValueError('h3_resolution must be an integer')
        else:
            h3_resolution = int(h3_resolution)

    logger.debug(f'Using "{longitude_field}" for X coordinates and "{latitude_field}" for '
                 f"coordinates to calculate H3 indices at H3 level {h3_resolution}.")

    # if an output field name is not explicitly provided, create one
    if output_h3_field_name is None:
        output_h3_field_name = f"h3_{h3_resolution:02d}"

    logger.debug(f'Writing H3 indices to new field named "{output_h3_field_name}"')
    
    # open the source CSV and use a reader to load values as dictionaries
    with open(input_file, mode='r', newline='', encoding='utf-8') as infile:
        reader = csv.DictReader(infile)

        # create a list of output field names with the new H3 field appended to the end
        fieldnames = reader.fieldnames + [output_h3_field_name]

        # open the file to output to using a dictionary writer
        with open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
            writer = csv.DictWriter(outfile, fieldnames=fieldnames)

            # write the field names in the first row, the header
            writer.writeheader()

            # iterate rows in the reader
            for idx, row in enumerate(reader):
                
                # retrive the coordinate values
                lon_val = row.get(longitude_field)
                lat_val = row.get(latitude_field)

                # check to ensure both values are not none
                if lon_val is None or lat_val is None:
                    warn(f'Cannot get H3 index for row {idx:,} because two coordinates values were not retrieved.')
                    h3_idx = None

                else:
                    h3_idx = h3.latlng_to_cell(float(lat_val), float(lon_val), h3_resolution)
                
                # set the H3 index in the row dictionary
                row[output_h3_field_name] = h3_idx

                # write the row to the output table
                writer.writerow(row)

    logger.info(f"{idx+1:,} rows written to {output_file}")

    return output_file

In [4]:
%%time
add_h3_field(csv_raw, csv_out, h3_resolution=h3_res)

DEBUG:coordinates-to-h3:Using "longitude" for X coordinates and "latitude" for coordinates to calculate H3 indices at H3 level 8.
DEBUG:coordinates-to-h3:Writing H3 indices to new field named "h3_08"
INFO:coordinates-to-h3:5,635 rows written to D:\projects\coordinates-to-h3\data\processed\poi_h3_08.csv


CPU times: total: 46.9 ms
Wall time: 46.1 ms


WindowsPath('D:/projects/coordinates-to-h3/data/processed/poi_h3_08.csv')