Permalink
Browse files

Added the unicode nazi

  • Loading branch information...
mitsuhiko committed Jan 22, 2011
0 parents commit 4f2dbc357e0f6e536949f1de1376626ed1861c00
Showing with 173 additions and 0 deletions.
  1. +32 −0 LICENSE
  2. +34 −0 README
  3. +107 −0 unicodenazi.py
32 LICENSE
@@ -0,0 +1,32 @@
+Copyright (c) 2011 by Armin Ronacher.
+
+Some rights reserved.
+
+Redistribution and use in source and binary forms of the software as well
+as documentation, with or without modification, are permitted provided
+that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+* The names of the contributors may not be used to endorse or
+ promote products derived from this software without specific
+ prior written permission.
+
+THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
34 README
@@ -0,0 +1,34 @@
+
+ // The Unicode Nazi //
+
+ Once enabled the unicode nazi will complain whenever something is not
+ purely unicode or bytestring. It's annoying as hell and only useful
+ when porting over libraries from Python 2 to Python 3. It has a huge
+ runtime impact on performance and will also complain about most of the
+ stuff in the Python standard library.
+
+ - usage:
+
+ >>> import unicodenazi
+ >>> 'foo' == u'foo'
+ __main__:1: UnicodeWarning: Implicit conversion of str to unicode
+ True
+
+ - how to disable it again:
+
+ >>> unicodenazi.disable()
+
+ - and here is how to enable:
+
+ >>> unicodenazi.enable()
+
+ - if you want to enable it only for a given block:
+
+ >>> unicodenazi.disable()
+ >>> with unicodenazi.blockwise(enabled=True):
+ ... assert 'foo' == u'foo'
+ ...
+ __main__:2: UnicodeWarning: Implicit conversion of str to unicode
+
+ The reverse works too of course.
+
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+"""
+ unicodenazi
+ ~~~~~~~~~~~
+
+ Annoying but helpful helper to find improper use of unicode and
+ bytestring conversions.
+
+ :copyright: (c) 2011 by Armin Ronacher.
+ :license: BSD, see LICENSE for more details.
+"""
+import sys
+import codecs
+import warnings
+import contextlib
+
+
+# use a small hack to get back the setdefaultencoding function
+_d = sys.__dict__.copy()
+reload(sys)
+_setdefaultencoding = sys.setdefaultencoding
+sys.__dict__.clear()
+sys.__dict__.update(_d)
+
+
+def warning_encode(input, errors='strict'):
+ warnings.warn(UnicodeWarning('Implicit conversion of unicode to str'),
+ stacklevel=2)
+ return codecs.ascii_encode(input, errors)
+
+
+def warning_decode(input, errors='strict'):
+ warnings.warn(UnicodeWarning('Implicit conversion of str to unicode'),
+ stacklevel=2)
+ return codecs.ascii_decode(input, errors)
+
+
+class Codec(codecs.Codec):
+ encode = staticmethod(warning_encode)
+ decode = staticmethod(warning_decode)
+
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return warning_encode(input, self.errors)[0]
+
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return warning_decode(input, self.errors)[0]
+
+
+class StreamWriter(Codec, codecs.StreamWriter):
+ pass
+
+
+class StreamReader(Codec, codecs.StreamReader):
+ pass
+
+
+def search_function(encoding):
+ if encoding != 'unicode-nazi':
+ return
+ return codecs.CodecInfo(
+ name='unicode-nazi',
+ encode=warning_encode,
+ decode=warning_decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader
+ )
+
+
+def enable():
+ """Enable Unicode warnings"""
+ _setdefaultencoding('unicode-nazi')
+
+
+def disable():
+ """Disable unicode warnings"""
+ _setdefaultencoding('ascii')
+
+
+def is_active():
+ """Is the unicodenazi active?"""
+ return sys.getdefaultencoding() == 'unicode-nazi'
+
+
+@contextlib.contextmanager
+def blockwise(enabled=True):
+ if enabled:
+ enable()
+ else:
+ disable()
+ try:
+ yield
+ finally:
+ if enabled:
+ disable()
+ else:
+ enable()
+
+
+# register codec and enable
+codecs.register(search_function)
+enable()

0 comments on commit 4f2dbc3

Please sign in to comment.