This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

added poxx.py and localization testing docs

  • Loading branch information...
wraithan committed Jan 7, 2013
1 parent 2d634b5 commit 61d619589288932cbf4580401df805497f0f4c21
Showing with 278 additions and 0 deletions.
  1. +1 −0 .gitignore
  2. +207 −0 bin/poxx.py
  3. +28 −0 bin/test_locales.sh
  4. +1 −0 docs/index.rst
  5. +41 −0 docs/localization_testing.rst
View
@@ -20,3 +20,4 @@ build
tmp/*
*~
*.mo
+locale/xx
View
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Munge a .po file so we English-bound can see what strings aren't marked
+for translation yet.
+
+Run this with a .po file as an argument. It will set the translated strings
+to be the same as the English, but with vowels in the wrong case:
+
+ ./poxx.py locale/xx/LC_MESSAGES/messages.po
+
+This code is in the public domain.
+
+Contributors:
+
+* Ned Batchelder
+* Will Kahn-Greene
+"""
+
+import re
+import string
+import sys
+try:
+ import polib # from http://bitbucket.org/izi/polib
+except ImportError:
+ print "You need to install polib. Do:"
+ print ""
+ print " pip install polib"
+ sys.exit()
+import HTMLParser
+
+
+INTERP_RE = re.compile(
+ r'(%(?:[(].+?[)])?[#0 +-]?[.\d*]*[hlL]?[diouxXeEfFgGcrs%])')
+
+
+def wc(c):
+ return c == "'" or c in string.letters
+
+
+def nwc(c):
+ return not wc(c)
+
+
+TRANSFORM = (
+ # INW?, NIW?, match, WC?, NW?, replacement
+ (False, False, "an", False, False, "un"),
+ (False, False, "An", False, False, "Un"),
+ (False, False, "au", False, False, "oo"),
+ (False, False, "Au", False, False, "Oo"),
+ (False, False, "a", True, False, "e"),
+ (False, False, "A", True, False, "E"),
+ (False, False, "en", False, True, "ee"),
+ (True, False, "ew", False, False, "oo"),
+ (True, False, "e", False, True, "e-a"),
+ (False, True, "e", False, False, "i"),
+ (False, True, "E", False, False, "I"),
+ (True, False, "f", False, False, "ff"),
+ (True, False, "ir", False, False, "ur"),
+ (True, False, "i", False, False, "ee"), # FIXME
+ (True, False, "ow", False, False, "oo"),
+ (False, True, "o", False, False, "oo"),
+ (False, True, "O", False, False, "Oo"),
+ (True, False, "o", False, False, "u"),
+ (False, False, "the", False, False, "zee"),
+ (False, False, "The", False, False, "Zee"),
+ (False, False, "th", False, True, "t"),
+ (True, False, "tion", False, False, "shun"),
+ (True, False, "u", False, False, "oo"),
+ (True, False, "U", False, False, "Oo"),
+ (False, False, "v", False, False, "f"),
+ (False, False, "V", False, False, "F"),
+ (False, False, "w", False, False, "v"),
+ (False, False, "W", False, False, "V")
+)
+
+
+def chef_transform(s):
+ # old_s = s
+ out = []
+
+ in_word = False # in a word?
+
+ # TODO: This is awful--better to do a real lexer
+ while s:
+ if s.startswith((".", "!", "?")):
+ in_word = False
+ out.append(s[0])
+ s = s[1:]
+ continue
+
+ for mem in TRANSFORM:
+ if in_word and not mem[0]:
+ continue
+ if not in_word and mem[1]:
+ continue
+ if not s.startswith(mem[2]):
+ continue
+ try:
+ if mem[3] and not wc(s[len(mem[2])]):
+ continue
+ except IndexError:
+ continue
+
+ try:
+ if mem[4] and not nwc(s[len(mem[2])]):
+ continue
+ except IndexError:
+ continue
+
+ out.append(mem[5])
+ s = s[len(mem[2]):]
+ in_word = True
+ break
+
+ else:
+ out.append(s[0])
+ s = s[1:]
+
+ # print old_s, "->", out
+ return u"".join(out)
+
+
+class HtmlAwareMessageMunger(HTMLParser.HTMLParser):
+ def __init__(self):
+ HTMLParser.HTMLParser.__init__(self)
+ self.s = ""
+
+ def result(self):
+ return self.s
+
+ def xform(self, s):
+ return chef_transform(s)
+
+ def handle_starttag(self, tag, attrs, closed=False):
+ self.s += "<" + tag
+ for name, val in attrs:
+ self.s += " "
+ self.s += name
+ self.s += '="'
+ if name in ['alt', 'title']:
+ self.s += self.xform(val)
+ else:
+ self.s += val
+ self.s += '"'
+ if closed:
+ self.s += " /"
+ self.s += ">"
+
+ def handle_startendtag(self, tag, attrs):
+ self.handle_starttag(tag, attrs, closed=True)
+
+ def handle_endtag(self, tag):
+ self.s += "</" + tag + ">"
+
+ def handle_data(self, data):
+ # We don't want to munge placeholders, so split on them, keeping them
+ # in the list, then xform every other token.
+ toks = INTERP_RE.split(data)
+ for i, tok in enumerate(toks):
+ if i % 2:
+ self.s += tok
+ else:
+ self.s += self.xform(tok)
+
+ def handle_charref(self, name):
+ self.s += "&#" + name + ";"
+
+ def handle_entityref(self, name):
+ self.s += "&" + name + ";"
+
+
+def translate_string(s):
+ hamm = HtmlAwareMessageMunger()
+ hamm.feed(s)
+ out = hamm.result()
+
+ if out.endswith(" >"):
+ return out[:-2] + u" börk! >"
+ elif out.endswith("\n"):
+ return out[:-2] + u" börk börk börk!\n"
+ return out + u" börk!"
+
+
+def munge_one_file(fname):
+ po = polib.pofile(fname)
+ po.metadata["Language"] = "Swedish Chef"
+ po.metadata["Plural-Forms"] = "nplurals=2; plural= n != 1"
+ po.metadata["Content-Type"] = "text/plain; charset=UTF-8"
+ count = 0
+ for entry in po:
+ if entry.msgid_plural:
+ entry.msgstr_plural["0"] = translate_string(entry.msgid)
+ entry.msgstr_plural["1"] = translate_string(entry.msgid_plural)
+ else:
+ entry.msgstr = translate_string(entry.msgid)
+
+ if 'fuzzy' in entry.flags:
+ entry.flags.remove('fuzzy') # clear the fuzzy flag
+ count += 1
+ print "Munged %d messages in %s" % (count, fname)
+ po.save()
+
+
+if __name__ == "__main__":
+ for fname in sys.argv[1:]:
+ munge_one_file(fname)
View
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# This creates a faux Swedish Chef locale under xx and transforms all the
+# strings such that every resulting string has four properties:
+#
+# 1. it's longer than the English equivalent (tests layout issues)
+# 2. it's different than the English equivalent (tests missing gettext calls)
+# 3, every string ends up with a non-ascii character (tests unicode)
+# 4. looks close enough to the English equivalent that you can quickly
+# figure out what's wrong
+#
+# Run this from the project root directory like this:
+#
+# $ bin/test_locales.sh
+
+echo "extract and merge...."
+./manage.py extract
+./manage.py merge
+
+echo "creating dir...."
+mkdir -p locale/xx/LC_MESSAGES
+
+echo "copying messages.pot file...."
+cp locale/templates/LC_MESSAGES/messages.pot locale/xx/LC_MESSAGES/messages.po
+
+echo "poxx messages.po file...."
+bin/poxx.py locale/xx/LC_MESSAGES/messages.po
+bin/compile-mo.sh locale/xx/
View
@@ -17,6 +17,7 @@ Contents
:maxdepth: 1
solitude_api
+ localization_testing
Indices and tables
------------------
@@ -0,0 +1,41 @@
+Localization Testing
+====================
+
+We are using a fake translation script that is mentioned on Ned Batchelder's
+blog called `poxx.py`_. The specific version we are using was lifted from
+Fjord_.
+
+What it does it is makes a translation for locale ``xx`` that turns all the
+strings into looking like something the `Swedish Chef`_ would say. There are
+some basic requirements for using it. You'll need to install polib_ like so::
+
+ pip install polib
+
+As well as gettext_ for OSX::
+
+ brew install gettext
+ brew link gettext
+
+Or Ubuntu::
+
+ apt-get install gettext gettext-tools
+
+Once you have the requirements you can run the script with the command::
+
+ ./bin/test_locales.sh
+
+You'll need to tweak your ``webpay/settings/local.py`` with the setting::
+
+ LANGUAGE_CODE = 'xx'
+
+Then you should be able to ``./manage.py runserver`` like normal and see
+everything translated. It should be very notable if the string is not
+translated. After updating your code/templates with your new translations you
+just simply run ``locale_test.sh`` again and it will regenerate the ``xx``
+locale for you!
+
+.. _`poxx.py`: http://nedbatchelder.com/blog/201012/faked_translations_poxxpy.html
+.. _Fjord: https://github.com/mozilla/fjord
+.. _`Swedish Chef`: http://en.wikipedia.org/wiki/Swedish_Chef
+.. _polib: https://crate.io/packages/polib/
+.. _gettext: http://www.gnu.org/software/gettext/

0 comments on commit 61d6195

Please sign in to comment.