Skip to content

Commit

Permalink
String handling for netcdf variable
Browse files Browse the repository at this point in the history
Adding new function `netcdf_extract_string` that takes care of possible ways
strings can be stored in netcdf. This fixes test failures when using 1.3.8
version that switched to returning unicode from `chartostring` method.
  • Loading branch information
Kirill888 committed May 29, 2017
1 parent 0ce2711 commit bb7c6f8
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 12 deletions.
21 changes: 15 additions & 6 deletions datacube/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,20 @@ def read_documents(*paths):
.format(path.name, _ALL_SUPPORTED_EXTENSIONS))


def netcdf_extract_string(chars):
"""
Convert netcdf S|U chars to Unicode string.
"""
if isinstance(chars, str):
return chars

chars = netCDF4.chartostring(chars)
if chars.dtype.kind == 'U':
return str(chars)
else:
return str(numpy.char.decode(chars))


def read_strings_from_netcdf(path, variable):
"""Load all of the string encoded data from a variable in a NetCDF file.
Expand All @@ -292,12 +306,7 @@ def read_strings_from_netcdf(path, variable):
"""
with netCDF4.Dataset(str(path)) as ds:
for chars in ds[variable]:
chars = netCDF4.chartostring(chars)
if chars.dtype.kind == 'U':
yield str(chars)
else:
yield str(numpy.char.decode(chars))

yield netcdf_extract_string(chars)

def validate_document(document, schema, schema_folder=None):
try:
Expand Down
8 changes: 2 additions & 6 deletions integration_tests/test_full_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pathlib import Path

import netCDF4
import numpy as np
import pytest

import yaml
Expand All @@ -13,7 +12,7 @@
from datacube.api.query import query_group_by

import datacube.scripts.cli_app
from datacube.utils import geometry, read_documents
from datacube.utils import geometry, read_documents, netcdf_extract_string
from .conftest import EXAMPLE_LS5_DATASET_ID

PROJECT_ROOT = Path(__file__).parents[1]
Expand Down Expand Up @@ -147,10 +146,7 @@ def check_attributes(obj, attrs):

def check_dataset_metadata_in_storage_unit(nco, dataset_dir):
assert len(nco.variables['dataset']) == 1 # 1 time slice
stored_metadata = nco.variables['dataset'][0]
if not isinstance(stored_metadata, str):
stored_metadata = netCDF4.chartostring(stored_metadata)
stored_metadata = str(np.char.decode(stored_metadata))
stored_metadata = netcdf_extract_string(nco.variables['dataset'][0])
ds_filename = dataset_dir / 'agdc-metadata.yaml'

stored = yaml.safe_load(stored_metadata)
Expand Down

0 comments on commit bb7c6f8

Please sign in to comment.