Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge remote-tracking branch 'johnnyjd/decode_str' into py3-for-review

Conflicts:
	musicbrainzngs/musicbrainz.py

Signed-off-by: Wieland Hoffmann <themineo@gmail.com>
  • Loading branch information...
commit d960a59bff0daf6f39ccaadee38d3187b3fa965a 2 parents 7ed9e11 + 97c535d
Wieland Hoffmann authored
Showing with 29 additions and 2 deletions.
  1. +6 −2 musicbrainzngs/musicbrainz.py
  2. +23 −0 musicbrainzngs/util.py
8 musicbrainzngs/musicbrainz.py
View
@@ -12,6 +12,7 @@
from xml.parsers import expat
from . import mbxml
from . import compat
+from . import util
_version = "0.3dev"
@@ -517,7 +518,10 @@ def _do_mb_search(entity, query='', fields={}, limit=None, offset=None):
for the given entity type.
"""
# Encode the query terms as a Lucene query string.
- query_parts = [query.replace('\x00', '').strip()]
+ query_parts = []
+ if query:
+ clean_query = util._unicode(query)
+ query_parts.append(clean_query)
for key, value in fields.items():
# Ensure this is a valid search field.
if key not in VALID_SEARCH_FIELDS[entity]:
@@ -526,8 +530,8 @@ def _do_mb_search(entity, query='', fields={}, limit=None, offset=None):
)
# Escape Lucene's special characters.
+ value = util._unicode(value)
value = re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', value)
- value = value.replace('\x00', '').strip()
value = value.lower() # Avoid binary operators like OR.
if value:
query_parts.append('%s:(%s)' % (key, value))
23 musicbrainzngs/util.py
View
@@ -0,0 +1,23 @@
+# This file is part of the musicbrainzngs library
+# Copyright (C) Alastair Porter, Adrian Sampson, and others
+# This file is distributed under a BSD-2-Clause type license.
+# See the COPYING file for more information.
+
+import sys
+import locale
+
+def _unicode(string, encoding=None):
+ """Try to decode byte strings to unicode.
+ This can only be a guess, but this might be better than failing.
+ It is safe to use this on numbers or strings that are already unicode.
+ """
+ if isinstance(string, str):
+ # use given encoding, stdin, preferred until something != None is found
+ if encoding is None:
+ encoding = sys.stdin.encoding
+ if encoding is None:
+ encoding = locale.getpreferredencoding()
+ unicode_string = unicode(string, encoding, "ignore")
+ else:
+ unicode_string = unicode(string)
+ return unicode_string.replace('\x00', '').strip()
Please sign in to comment.
Something went wrong with that request. Please try again.