Skip to content

Commit

Permalink
Better type adaption / validation on OGR import.
Browse files Browse the repository at this point in the history
  • Loading branch information
olsen232 committed Dec 8, 2020
1 parent 5d2509d commit e8e5318
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 40 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where
* Bugfix - Fixed a potential `unexpected NoneType` in `WorkingCopy.is_dirty`
* Bugfix - imports now preserve fixed-precision numeric types in most situations.
* Bugfix - imports now preserve length of text/string fields.
* Bugfix - imported fields of type `numeric` now stored internally as strings, as required by datasets V2 spec. [#325](https://github.com/koordinates/sno/pull/325)

## 0.6.0

Expand Down
31 changes: 8 additions & 23 deletions sno/ogr_import_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
NO_IMPORT_SOURCE,
NO_TABLE,
)
from .geometry import Geometry, ogr_to_gpkg_geom
from .geometry import Geometry
from .import_source import ImportSource
from .ogr_util import adapt_value_noop, get_type_value_adapter
from .ogr_util import get_type_value_adapter
from .output_util import dump_json_output, get_input_mode, InputMode
from .schema import Schema, ColumnSchema
from .utils import ungenerator
Expand Down Expand Up @@ -352,34 +352,19 @@ def _get_primary_key_value(self, ogr_feature, name):

@property
@functools.lru_cache(maxsize=1)
@ungenerator(dict)
def field_adapter_map(self):
ld = self.ogrlayer.GetLayerDefn()

if self.primary_key:
yield self.primary_key, adapt_value_noop

for name in self.geometry_column_names:
yield name, adapt_value_noop

for i in range(ld.GetFieldCount()):
field = ld.GetFieldDefn(i)
name = field.GetName()
yield name, get_type_value_adapter(field.GetType())
return {col.name: get_type_value_adapter(col.data_type) for col in self.schema}

@ungenerator(dict)
def _ogr_feature_to_sno_feature(self, ogr_feature):
for name, adapter in self.field_adapter_map.items():
if name in self.geometry_column_names:
yield (
name,
Geometry.of(ogr_to_gpkg_geom(ogr_feature.GetGeometryRef())),
)
elif name == self.primary_key:
yield name, self._get_primary_key_value(ogr_feature, name)
if name == self.primary_key:
value = self._get_primary_key_value(ogr_feature, name)
elif name in self.geometry_column_names:
value = ogr_feature.GetGeometryRef()
else:
value = ogr_feature.GetField(name)
yield name, adapter(value)
yield name, adapter(value)

def _iter_ogr_features(self):
l = self.ogrlayer
Expand Down
68 changes: 51 additions & 17 deletions sno/ogr_util.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,69 @@
from osgeo import ogr


def adapt_value_noop(value):
return value
from .geometry import ogr_to_gpkg_geom


def adapt_ogr_date(value):
if value is None:
return value
# OGR uses this strange format: '2012/07/09'
# We convert back to a normal ISO8601 format.
return value.replace("/", "-")
return str(value).replace("/", "-")


def adapt_ogr_datetime(value):
def adapt_ogr_timestamp(value):
if value is None:
return value
# OGR uses this strange format: '2012/07/09 09:01:52+00'
# We convert back to a normal ISO8601 format.
return value.replace("/", "-").replace(" ", "T").replace("+00", "Z")
return str(value).replace("/", "-").replace(" ", "T").replace("+00", "Z")


def adapt_ogr_geometry(value):
if value is None:
return value
return ogr_to_gpkg_geom(value)


def ensure_bool(value):
return bool(value) if value is not None else None


def ensure_bytes(value):
return bytes(value) if value is not None else None


def ensure_str(value):
return str(value) if value is not None else None


def ensure_int(value):
return int(value) if value is not None else None


def ensure_float(value):
return float(value) if value is not None else None


OGR_TYPE_ADAPTERS = {
"boolean": ensure_bool,
"blob": ensure_bytes,
"date": adapt_ogr_date,
"float": ensure_float,
"geometry": adapt_ogr_geometry,
"integer": ensure_int,
"interval": ensure_str,
"numeric": ensure_str,
"text": ensure_str,
"time": ensure_str,
"timestamp": adapt_ogr_timestamp,
}


def get_type_value_adapter(ogr_type):
def get_type_value_adapter(v2_type):
"""
Returns a function which will convert values of the given OGR type
into a more-sensible value.
Returns a function which will convert values to the given V2 type
from the equivalent OGR type.
For most types this is a noop.
For most types this should be a no-op, but we try to be defensive and ensure that
(for instance) floats stay floats and ints stay ints.
"""
if ogr_type == ogr.OFTDate:
return adapt_ogr_date
elif ogr_type == ogr.OFTDateTime:
return adapt_ogr_datetime
return adapt_value_noop
return OGR_TYPE_ADAPTERS[v2_type]

0 comments on commit e8e5318

Please sign in to comment.