termio.py: Changed the rate limiter to only engage after a timeout of…

… 5 seconds. The previous two-second timeout was too short for embedded/slower systems. This will eventually be a tunable parameter. termio.py: Changed the logic inside of term_write() to ensure that all characters are stored as bytes instead of a mix/match of Unicode/bytes. This was to work around some issues with Python 3 support. termio.py: Inline images are working again as a result of the aforementioned change. Apparently that feature broke when I made a change to termio ~1.5 weeks ago to get Python 3 working again. terminal.py: handle_special has been modified to be more friendly with Python 3. As a bonus this change seems to have sped it up a tiny bit. terminal.py: Terminal.write() was checking for a UnicodeEncodeError when it should've been checking for a UnicodeDecodeError. terminal.py: Fixed an issue with Unicode diacritics... Sometimes combining characters like ̴ (special form of tilda) would still result in two characters after being run through unicodedata.normalize(). This would result in a TypeError when trying to add the characters to the screen array since it was trying to add two characters to a position that can only accept one. The diacritic logic now gracefully handles this situation by writing each character to the screen array individually. logviewer.py: After running through some test scenarios I noticed that Unicode characters weren't showing up properly when playing back logs in real-time. I fixed this (missing a .decode() in two spots). test_term_renditions.py: Added a short delay between line outputs in order to reduce the CPU consumption and prevent the rate limiter from truncating the output.
liftoff · Sep 16, 2012 · 02ac655 · 02ac655
1 parent 6f92ff3
commit 02ac655
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 36 deletions.
diff --git a/gateone/logviewer.py b/gateone/logviewer.py
@@ -123,7 +123,7 @@ def playback_log(log_path, file_like, show_esc=False):
             frame = frame[14:] # Skips the colon
             if i == 0:
                 # Write it out immediately
-                file_like.write(frame)
+                file_like.write(frame.decode('UTF-8'))
                 prev_frame_time = frame_time
             else:
             # Wait until the time between the previous frame and now has passed
@@ -132,7 +132,7 @@ def playback_log(log_path, file_like, show_esc=False):
                 prev_frame_time = frame_time
                 if show_esc:
                     frame = raw(frame)
-                file_like.write(frame)
+                file_like.write(frame.decode('UTF-8'))
                 file_like.flush()
         except ValueError:
             # End of file.  No biggie.
@@ -219,6 +219,7 @@ def flatten_log(log_path, preserve_renditions=True, show_esc=False):
     cr = False
     # We skip the first frame, [1:] because it holds the recording metadata
     for frame in lines.split(SEPARATOR.encode('UTF-8'))[1:]:
+        frame = frame.decode('UTF-8', 'ignore')
         try:
             frame_time = float(frame[:13]) # First 13 chars is the timestamp
             # Convert to datetime object

diff --git a/gateone/terminal.py b/gateone/terminal.py
@@ -278,9 +278,9 @@
 
 def handle_special(e):
     """
-    Used in conjunction with codecs.register_error, will replace special ascii
+    Used in conjunction with :py:func:`codecs.register_error`, will replace
-    characters such as 0xDA and 0xc4 (which are used by ncurses) with their
+    special ascii characters such as 0xDA and 0xc4 (which are used by ncurses)
-    Unicode equivalents.
+    with their Unicode equivalents.
     """
     # TODO: Get this using curses special characters when appropriate
     curses_specials = {
@@ -447,11 +447,18 @@ def handle_special(e):
     }
     # I left this in its odd state so I could differentiate between the two
     # in the future.
+    chars = e.object
+    if bytes == str: # Python 2
+        # Convert e.object to a bytearray for an easy switch to integers.
+        # It is quicker than calling ord(char) on each char in e.object
+        chars = bytearray(e.object)
+        # NOTE: In Python 3 when you iterate over bytes they appear as integers.
+        #       So we don't need to convert to a bytearray in Python 3.
     if isinstance(e, (UnicodeEncodeError, UnicodeTranslateError)):
-        s = [u'%s' % specials[ord(c)] for c in e.object[e.start:e.end]]
+        s = [u'%s' % specials[c] for c in chars[e.start:e.end]]
         return ''.join(s), e.end
     else:
-        s = [u'%s' % specials[ord(c)] for c in e.object[e.start:e.end]]
+        s = [u'%s' % specials[c] for c in chars[e.start:e.end]]
         return ''.join(s), e.end
 codecs.register_error('handle_special', handle_special)
 
@@ -1284,14 +1291,17 @@ def write(self, chars, special_checks=True):
         # Have to convert to unicode
         try:
             chars = chars.decode('utf-8', "handle_special")
-        except UnicodeEncodeError:
+        except UnicodeDecodeError:
             # Just in case
             try:
                 chars = chars.decode('utf-8', "ignore")
-            except UnicodeEncodeError:
+            except UnicodeDecodeError:
                 logging.error(
-                    _("Double UnicodeEncodeError in Terminal.terminal."))
+                    _("Double UnicodeDecodeError in terminal.Terminal."))
                 return
+        except AttributeError:
+            # In Python 3 strings don't have .decode()
+            pass # Already Unicode
         for char in chars:
             charnum = ord(char)
             if charnum in specials:
@@ -1375,7 +1385,17 @@ def write(self, chars, special_checks=True):
                             current = self.screen[self.cursorY][self.cursorX]
                             combined = unicodedata.normalize(
                                 'NFC', u'%s%s' % (current, char))
-                            self.screen[self.cursorY][self.cursorX] = combined
+                            # Sometimes a joined combining char can still result
+                            # a string of length > 1.  So we need to handle that
+                            if len(combined) > 1:
+                                for i, c in enumerate(combined):
+                                    self.screen[self.cursorY][
+                                        self.cursorX] = c
+                                    if i < len(combined) - 1:
+                                        cursor_right()
+                            else:
+                                self.screen[self.cursorY][
+                                    self.cursorX] = combined
                         else:
                             # Normal character
                             self.screen[self.cursorY][self.cursorX] = char

diff --git a/gateone/termio.py b/gateone/termio.py
@@ -182,10 +182,10 @@ def retrieve_last_frame(golog_path):
     end_frames = golog.read().split(encoded_separator)
     if len(end_frames) > 1:
         # Very last item will be empty
-        return end_frames[-2].decode('UTF-8')
+        return end_frames[-2].decode('UTF-8', 'ignore')
     else:
         # Just a single frame here, return it as-is
-        return end_frames[0].decode('UTF-8')
+        return end_frames[0].decode('UTF-8', 'ignore')
 
 def get_or_update_metadata(golog_path, user, force_update=False):
     """
@@ -234,7 +234,7 @@ def get_or_update_metadata(golog_path, user, force_update=False):
         log_data += chunk
         if len(chunk) < chunk_size:
             break
-    log_data = log_data.decode('UTF-8')
+    log_data = log_data.decode('UTF-8', 'ignore')
     start_date = first_frame[:13] # Getting the start date is easy
     last_frame = retrieve_last_frame(golog_path) # This takes some work
     end_date = last_frame[:13]
@@ -533,45 +533,43 @@ def term_write(self, stream):
 
         .. note:: This kind of logging doesn't capture user keystrokes.  This is intentional as we don't want passwords winding up in the logs.
         """
+        #logging.debug('term_write() stream: %s' % repr(stream))
         # Write to the log (if configured)
+        separator = b"\xf3\xb0\xbc\x8f"
         if self.log_path:
-            now = int(round(time.time() * 1000))
+            # Using .encode() below ensures the result will be bytes
+            now = str(int(round(time.time() * 1000))).encode('UTF-8')
             if not os.path.exists(self.log_path):
                 # Write the first frame as metadata
                 metadata = {
                     'version': '1.0', # Log format version
                     'rows': self.rows,
                     'cols': self.cols,
-                    'start_date': now
+                    'start_date': now.decode('UTF-8') # JSON needs strings
                     # NOTE: end_date should be added later when the is read for
                     # the first time by either the logviewer or the logging
                     # plugin.
                 }
                 # The hope is that we can use the first-frame-metadata paradigm
                 # to store all sorts of useful information about a log.
-                metadata_frame = str(json_encode(metadata))
+                # NOTE: Using .encode() below to ensure it is bytes in Python 3
-                metadata_frame = u"%s:%s\U000f0f0f" % (now, metadata_frame)
+                metadata_frame = json_encode(metadata).encode('UTF-8')
-                if bytes != str: # Python 3
+                # Using concatenation of bytes below to ensure compatibility
-                    metadata_frame = bytes(metadata_frame, 'UTF-8')
+                # with both Python 2 and Python 3.
-                else:
+                metadata_frame = now + b":" + metadata_frame + separator
-                    metadata_frame = metadata_frame.encode('UTF-8')
                 self.log = gzip.open(self.log_path, mode='a')
                 self.log.write(metadata_frame)
             if not self.log: # Only comes into play if the file already exists
                 self.log = gzip.open(self.log_path, mode='a')
             # NOTE: I'm using an obscure unicode symbol in order to avoid
-            # conflicts.  We need to dpo our best to ensure that we can
+            # conflicts.  We need to do our best to ensure that we can
             # differentiate between terminal output and our log format...
             # This should do the trick because it is highly unlikely that
             # someone would be displaying this obscure unicode symbol on an
             # actual terminal unless they were using Gate One to view a
             # Gate One log file in vim or something =)
-            # \U000f0f0f == U+F0F0F (Private Use Symbol)
+            # "\xf3\xb0\xbc\x8f" == \U000f0f0f == U+F0F0F (Private Use Symbol)
-            output = u"%s:%s\U000f0f0f" % (now, stream.decode('UTF-8'))
+            output = now + b":" + stream + separator
-            if bytes != str: # Python 3
-                output = bytes(output, 'UTF-8')
-            else:
-                output = output.encode('UTF-8')
             self.log.write(output)
         # NOTE: Gate One's log format is special in that it can be used for both
         # playing back recorded sessions *or* generating syslog-like output.
@@ -1472,9 +1470,9 @@ def _read(self, bytes=-1):
                     # feeding us too much data (so we can engage the rate
                     # limiter).
                     bytes = 8192 # Should be plenty
-                    # If we need to block/read for longer than two seconds
+                    # If we need to block/read for longer than five seconds
                     # the fd is outputting too much data.
-                    two_seconds = timedelta(seconds=2)
+                    five_seconds = timedelta(seconds=5)
                     loop_start = datetime.now()
                     while True:
                         updated = reader.read(bytes)
@@ -1488,9 +1486,9 @@ def _read(self, bytes=-1):
                             # eventually have to process it all which would
                             # take forever.
                             break
-                        elif datetime.now() - loop_start > two_seconds:
+                        elif datetime.now() - loop_start > five_seconds:
                             self._blocked_io_handler()
-                        result += str(updated)
+                        result += updated
                         self.term_write(updated)
                 elif bytes:
                     result = reader.read(bytes)

diff --git a/gateone/tests/test_term_renditions.py b/gateone/tests/test_term_renditions.py
@@ -526,7 +526,13 @@ def color_combos_256():
     print("IF YOUR SYSTEM BECOMES UNRESPONSIVE TRY CLOSING THIS TERMINAL")
     print("...starting test in 5 seconds...")
     sleep(5)
-    print(color_combos_8())
+    for line in color_combos_8().split('\n'):
-    print(color_combos_16())
+        print(line)
-    print(fancy_styles())
+        sleep(0.15)
+    for line in color_combos_16().split('\n'):
+        print(line)
+        sleep(0.15)
+    for line in fancy_styles().split('\n'):
+        print(line)
+        sleep(0.15)
     print(color_combos_256())