Permalink
Browse files

Merge pull request #66 from davidjb/default-field-type

Provide default string-based field for schema field types unknown to Sunburnt
  • Loading branch information...
2 parents 769053a + 8729b7c commit d3493174f7a4cda2de521167d8105c144686d8e7 @tow committed Dec 24, 2012
Showing with 93 additions and 7 deletions.
  1. +1 −0 .gitignore
  2. +2 −0 Changelog
  3. +45 −0 docs/connectionconfiguration.rst
  4. +11 −4 sunburnt/schema.py
  5. +34 −3 sunburnt/test_schema.py
View
@@ -1,4 +1,5 @@
*.pyc
+*.swp
.cache
MANIFEST
build
View
@@ -1,5 +1,7 @@
* 0.7 : unreleased
+ - Provide default string-based field for schema field types unknown to
- Escape forward slash characters for compatibility with Solr 4.0 (@davidjb)
+ Sunburnt. (@davidjb)
- Fix handling of queries with ``boost_relevancy`` applied - boost was
previously lost in some cases. (@davidjb)
- Ensure 'more like this' results are transformed using a query's
@@ -27,6 +27,51 @@ parameters.
currently active schema. If you want to use a different schema for
any reason, pass in a file object here which yields a schema
document.
+
+ In querying the current active schema, sunburnt will automatically
+ understand the available fields and their respective types. Sunburnt
+ has a variety of field helpers that automatically serialize and
+ deserialize data types behind the scenes (such as when querying Solr
+ and parsing a response). For instance, this means that if you have a
+ field ``quantity`` in your schema with type ``solr.IntField``, Sunburnt
+ is aware that values of this field are integers. So, values going to Solr
+ in a query will get serialized into an appropriate string, and those coming
+ back as strings will be deserialized as ``int`` values.
+
+ Most built-in Solr field types (in the ``solr.*`` namespace) are understood,
+ including:
+
+ ======================== ===========
+ Field Type Python Type
+ ======================== ===========
+ solr.StrField unicode
+ solr.TextField unicode
+ solr.BoolField bool
+ solr.ShortField int (-32768 to 32767)
+ solr.IntField int
+ solr.SortableIntField int
+ solr.TrieIntField int
+ solr.LongField long
+ solr.SortableLongField long
+ solr.TrieLongField long
+ solr.FloatField float
+ solr.SortableFloatField float
+ solr.TrieFloatField float
+ solr.DoubleField float
+ solr.SortableDoubleField float
+ solr.TrieDoubleField float
+ solr.DateField datetime (or mx.DateTime)
+ solr.TrieDateField datetime (or mx.DateTime)
+ solr.RandomSortField str (default handling)
+ solr.UUIDField uuid.UUID
+ solr.BinaryField unicode (base64 decoded)
+ solr.PointType solr_point (1 dimension)
+ solr.LatLonType solr_point (2 dimensions)
+ solr.GeoHashField solr_point (2 dimensions)
+ ======================== ===========
+
+ If you are using a custom field type that Sunburnt does not
+ natively understand, values will be treated as strings.
* ``http_connection``. By default, solr will open a new ``httplib2.Http``
object to talk to the solr instance. If you want to re-use an
View
@@ -147,6 +147,14 @@ def match(self, name):
else:
return name.startswith(self.name[:-1])
+ def normalize(self, value):
+ """ Normalize the given value according to the field type.
+
+ This method does nothing by default, returning the given value
+ as is. Child classes may override this method as required.
+ """
+ return value
+
def instance_from_user_data(self, data):
return SolrFieldInstance.from_user_data(self, data)
@@ -392,6 +400,7 @@ class SolrSchema(object):
'solr.LatLonType':SolrPoint2Field,
'solr.GeoHashField':SolrPoint2Field,
}
+
def __init__(self, f):
"""initialize a schema object from a
filename or file-like object."""
@@ -441,10 +450,8 @@ def field_type_factory(self, field_type_node):
name, class_name = field_type_node.attrib['name'], field_type_node.attrib['class']
except KeyError, e:
raise SolrError("Invalid schema.xml: missing %s attribute on fieldType" % e.args[0])
- try:
- field_class = self.solr_data_types[class_name]
- except KeyError:
- raise SolrError("Unknown field_class '%s'" % class_name)
+ #Obtain field type for given class. Defaults to generic SolrField.
+ field_class = self.solr_data_types.get(class_name, SolrField)
return name, SolrFieldTypeFactory(field_class,
**self.translate_attributes(field_type_node.attrib))
View
@@ -82,11 +82,13 @@ def test_solr_date_from_strings():
<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
</types>
<fields>
<field name="int_field" required="true" type="sint"/>
<field name="text_field" required="true" type="string" multiValued="true"/>
<field name="boolean_field" required="false" type="boolean"/>
+ <field name="location_field" required="false" type="location_rpt"/>
</fields>
<defaultSearchField>text_field</defaultSearchField>
<uniqueKey>int_field</uniqueKey>
@@ -103,7 +105,10 @@ def test_read_schema(self):
that we get the right set of fields, the right
default field, and the right unique key"""
assert set(self.s.fields.keys()) \
- == set(['boolean_field', 'int_field', 'text_field'])
+ == set(['boolean_field',
+ 'int_field',
+ 'text_field',
+ 'location_field'])
assert self.s.default_field_name == 'text_field'
assert self.s.unique_key == 'int_field'
@@ -113,8 +118,9 @@ def test_serialize_dict(self):
for k, v, v2 in (('int_field', 1, u'1'),
('text_field', 'text', u'text'),
('text_field', u'text', u'text'),
- ('boolean_field', True, u'true')):
- assert self.s.field_from_user_data(k, v).to_solr() == v2
+ ('boolean_field', True, u'true'),
+ ('location_field', 'POINT (30 10)', 'POINT (30 10)')):
+ assert self.s.field_from_user_data(k, v).to_solr() == v2
def test_missing_fields(self):
assert set(self.s.missing_fields([])) \
@@ -139,6 +145,31 @@ def test_serialize_value_list_fails_when_wrong_datatype(self):
else:
assert False
+ def test_unknown_field_type(self):
+ """ Check operation of a field type that is unknown to Sunburnt.
+ """
+ assert 'solr.SpatialRecursivePrefixTreeFieldType' \
+ not in SolrSchema.solr_data_types
+ field = self.s.fields['location_field']
+ assert field
+
+ #Boolean attributes are converted accordingly
+ assert field.geo == True
+ #All other attributes are strings
+ assert field.units == 'degrees'
+ assert field.distErrPct == '0.025'
+ assert field.maxDistErr == '0.000009'
+
+ #Test that the value is always consistent - both to and from Solr
+ value = 'POLYGON ((30 10, 10 20, 20 40, 40 40, 30 10))'
+ assert field.to_user_data(value) \
+ == field.from_user_data(value) \
+ == field.to_solr(value) \
+ == field.from_solr(value)
+
+ #Queried values will be escaped accordingly
+ assert field.to_query(value) == u'POLYGON\\ \\(\\(30\\ 10,\\ 10\\ 20,\\ 20\\ 40,\\ 40\\ 40,\\ 30\\ 10\\)\\)'
+
broken_schemata = {
"missing_name":

0 comments on commit d349317

Please sign in to comment.