From 68cac6eabf474dd32f317e1671f3e9dc2b5fd697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 23 Apr 2014 16:25:49 +0200 Subject: [PATCH] Escape strings before writing them to the log. This fixes: https://github.com/TheTorProject/ooni-probe/issues/302 for ooni-probe --- ooni/tests/test_utils.py | 12 +++++++++- ooni/utils/log.py | 52 ++++++++++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/ooni/tests/test_utils.py b/ooni/tests/test_utils.py index 10aa0cc0..e21ab89b 100644 --- a/ooni/tests/test_utils.py +++ b/ooni/tests/test_utils.py @@ -1,7 +1,9 @@ import os from twisted.trial import unittest -from ooni.utils import pushFilenameStack +from ooni.utils import pushFilenameStack, log + + class TestUtils(unittest.TestCase): def test_pushFilenameStack(self): basefilename = os.path.join(os.getcwd(), 'dummyfile') @@ -20,3 +22,11 @@ def test_pushFilenameStack(self): self.assertEqual(str(i-1), str(c)) f.close() + def test_log_encode(self): + logmsgs = ( + (r"spam\x07\x08", "spam\a\b"), + (r"spam\x07\x08", u"spam\a\b"), + (r"ham\u237e", u"ham"+u"\u237e") + ) + for encoded_logmsg, logmsg in logmsgs: + self.assertEqual(log.log_encode(logmsg), encoded_logmsg) diff --git a/ooni/utils/log.py b/ooni/utils/log.py index 7fafcfbd..beba78f3 100644 --- a/ooni/utils/log.py +++ b/ooni/utils/log.py @@ -1,5 +1,6 @@ -import sys import os +import sys +import codecs import logging import traceback @@ -10,10 +11,33 @@ from ooni import otime -## Get rid of the annoying "No route found for -## IPv6 destination warnings": +# Get rid of the annoying "No route found for +# IPv6 destination warnings": logging.getLogger("scapy.runtime").setLevel(logging.ERROR) + +def log_encode(logmsg): + """ + I encode logmsg (a str or unicode) as printable ASCII. Each case + gets a distinct prefix, so that people differentiate a unicode + from a utf-8-encoded-byte-string or binary gunk that would + otherwise result in the same final output. + """ + if isinstance(logmsg, unicode): + return codecs.encode(logmsg, 'unicode_escape') + elif isinstance(logmsg, str): + try: + unicodelogmsg = logmsg.decode('utf-8') + except UnicodeDecodeError: + return codecs.encode(logmsg, 'string_escape') + else: + return codecs.encode(unicodelogmsg, 'unicode_escape') + else: + raise Exception("I accept only a unicode object or a string, " + "not a %s object like %r" % (type(logmsg), + repr(logmsg))) + + class LogWithNoPrefix(txlog.FileLogObserver): def emit(self, eventDict): text = txlog.textFromEventDict(eventDict) @@ -23,6 +47,7 @@ def emit(self, eventDict): util.untilConcludes(self.write, "%s\n" % text) util.untilConcludes(self.flush) # Hoorj! + class OONILogger(object): def start(self, logfile=None, application_name="ooniprobe"): from ooni.settings import config @@ -30,16 +55,17 @@ def start(self, logfile=None, application_name="ooniprobe"): daily_logfile = None if not logfile: - logfile = config.basic.logfile + logfile = os.path.expanduser(config.basic.logfile) log_folder = os.path.dirname(logfile) log_filename = os.path.basename(logfile) daily_logfile = DailyLogFile(log_filename, log_folder) - txlog.msg("Starting %s on %s (%s UTC)" % (application_name, otime.prettyDateNow(), - otime.utcPrettyDateNow())) - + txlog.msg("Starting %s on %s (%s UTC)" % (application_name, + otime.prettyDateNow(), + otime.utcPrettyDateNow())) + self.fileObserver = txlog.FileLogObserver(daily_logfile) self.stdoutObserver = LogWithNoPrefix(sys.stdout) @@ -52,28 +78,34 @@ def stop(self): oonilogger = OONILogger() + def start(logfile=None, application_name="ooniprobe"): oonilogger.start(logfile, application_name) + def stop(): oonilogger.stop() + def msg(msg, *arg, **kw): from ooni.settings import config if config.logging: - print "%s" % msg + print "%s" % log_encode(msg) + def debug(msg, *arg, **kw): from ooni.settings import config if config.advanced.debug and config.logging: - print "[D] %s" % msg + print "[D] %s" % log_encode(msg) + def err(msg, *arg, **kw): from ooni.settings import config if config.logging: if isinstance(msg, Exception): msg = "%s: %s" % (msg.__class__.__name__, msg) - print "[!] %s" % msg + print "[!] %s" % log_encode(msg) + def exception(error): """