Skip to content
Browse files

termio.py: Fixed a bug where log metadata was getting written twice a…

…t the start of logs.

termio.py:  Fixed a bug in get_or_update_metadata() where you could get a MemoryError exception when acting upon very large log files.  This function will now finalize logs by reading them a little bit at a time, writing the updated log to a temp file, and finally overwriting the original log with the temp one via shutil.move().  This should be much more reliable and safer than the old way.
termio.py:  Fixed a bug where the rows and columns weren't getting set properly in log metadata.
terminal.py:  Fixed a bug where if UTF-8-decoded unicode was passed to Terminal.write() it could raise a UnicodeDecodeError exception if special checks were enabled.  Now it will re-encode the text to ensure it doesn't barf futher on in the function.  The output in such situations won't be pretty but at least the terminal will continue running smoothly.
terminal.py:  Changed the JPEG regex slightly in order to make it more reliable.
logviewer.py:  Added a new function:  get_frames().  It is a generator that iterates over all the frames inside of a .golog file.  It reads the file in chunks in order to conserve memory.
logviewer.py:  Fixed a bug where you could get a MemoryError exception when trying to play back very large log files.  Both playback_log() and flatten_log() are using the aforementioned get_frames() generator to solve this problem.
logviewer.py:  Changed flatten_log() so that it takes a file-like object as a second argument.  This object will be written to progressively as the log is read instead of reading the entire log into memory and returning the whole thing as one big string.
logviewer.py:  When pressing Ctrl-C to kill a running log playback it will now exit gracefully instead of printing a confusing KeyboardInterrupt traceback error message.  Polish!
Logging Plugin:  Updated logging_plugin.py to be able to use the new way of using flatten_log().
  • Loading branch information...
1 parent 9bdb8eb commit 41d34535a78bf92364c9d0fdd69de0c38b6c8b9b @liftoff committed Sep 26, 2012
Showing with 216 additions and 105 deletions.
  1. +144 −82 gateone/logviewer.py
  2. +16 −8 gateone/plugins/logging/logging_plugin.py
  3. +2 −0 gateone/plugins/ssh/scripts/ssh_connect.py
  4. +12 −4 gateone/terminal.py
  5. +42 −11 gateone/termio.py
View
226 gateone/logviewer.py
@@ -11,7 +11,7 @@
__author__ = 'Dan McDougall <daniel.mcdougall@liftoffsoftware.com>'
# Import stdlib stuff
-import os, sys, re, gzip
+import os, sys, re, gzip, fcntl, termios, struct
from time import sleep
from datetime import datetime
from optparse import OptionParser
@@ -104,6 +104,28 @@
r'.*\x1b\][0-2]\;(.+?)(\x07|\x1b\\)', re.DOTALL|re.MULTILINE)
# TODO: Support Fast forward/rewind/pause like Gate One itself.
+def get_frames(golog_path, chunk_size=131072):
+ """
+ A generator that iterates over the frames in a .golog file, returning them
+ as strings.
+ """
+ encoded_separator = SEPARATOR.encode('UTF-8')
+ golog = gzip.open(golog_path)
+ frame = ""
+ while True:
+ chunk = golog.read(chunk_size)
+ frame += chunk
+ if encoded_separator in chunk:
+ split_frames = frame.split(encoded_separator)
+ next_frame = split_frames[-1]
+ for fr in split_frames[:-1]:
+ yield fr
+ frame = next_frame
+ if len(chunk) < chunk_size:
+ # Write last frame
+ if frame:
+ yield frame
+ break
def playback_log(log_path, file_like, show_esc=False):
"""
@@ -112,31 +134,35 @@ def playback_log(log_path, file_like, show_esc=False):
methods.
If *show_esc* is True, escape sequences and control characters will be
- escaped so they can be seen in the output.
+ escaped so they can be seen in the output. There will also be no delay
+ between the output of frames (under the assumption that if you want to see
+ the raw log you want it to output all at once so you can pipe it into
+ some other app).
"""
- log = gzip.open(log_path).read()
prev_frame_time = None
- # Skip first frame
- for i, frame in enumerate(log.split(SEPARATOR.encode('UTF-8'))[1:]):
- try:
+ try:
+ for count, frame in enumerate(get_frames(log_path)):
frame_time = float(frame[:13]) # First 13 chars is the timestamp
- frame = frame[14:] # Skips the colon
- if i == 0:
+ frame = frame[14:] # [14:] Skips the timestamp and the colon
+ if count == 0:
# Write it out immediately
- file_like.write(frame.decode('UTF-8'))
+ if show_esc:
+ frame = raw(frame)
+ file_like.write(frame)
prev_frame_time = frame_time
else:
- # Wait until the time between the previous frame and now has passed
- wait_time = (frame_time - prev_frame_time)/1000.0
- sleep(wait_time) # frame times are in milliseconds
- prev_frame_time = frame_time
if show_esc:
frame = raw(frame)
- file_like.write(frame.decode('UTF-8'))
- file_like.flush()
- except ValueError:
- # End of file. No biggie.
- return
+ else:
+ # Wait until the time between the previous frame and now
+ # has passed
+ wait_time = (frame_time - prev_frame_time)/1000.0
+ sleep(wait_time) # frame times are in milliseconds
+ file_like.write(frame)
+ prev_frame_time = frame_time
+ file_like.flush()
+ except IOError: # Something wrong with the file
+ return
def escape_escape_seq(text, preserve_renditions=True, rstrip=True):
"""
@@ -191,14 +217,15 @@ def escape_escape_seq(text, preserve_renditions=True, rstrip=True):
continue
if rstrip:
# Remove trailing whitespace + trailing ESC sequences
- #return esc_rstrip.sub('', out).rstrip()
return out.rstrip()
else: # All these trailers better make for a good movie
return out
-def flatten_log(log_path, preserve_renditions=True, show_esc=False):
+def flatten_log(log_path, file_like, preserve_renditions=True, show_esc=False):
"""
- Given a log file at *log_path*, return a str of log lines contained within.
+ Given a log file at *log_path*, write a string of log lines contained
+ within to *file_like*. Where *file_like* is expected to be any file-like
+ object with write() and flush() methods.
If *preserve_renditions* is True, CSI escape sequences for renditions will
be preserved as-is (e.g. font color, background, etc). This is to make the
@@ -213,71 +240,95 @@ def flatten_log(log_path, preserve_renditions=True, show_esc=False):
can be used with grep and similar search/filter tools.
"""
import gzip
- lines = gzip.open(log_path).read()
- out = ""
+ encoded_separator = SEPARATOR.encode('UTF-8')
out_line = ""
cr = False
# We skip the first frame, [1:] because it holds the recording metadata
- for frame in lines.split(SEPARATOR.encode('UTF-8'))[1:]:
+ for count, frame in enumerate(get_frames(log_path)):
+ if count == 0:
+ # Skip the first frame (it's just JSON-encoded metadata)
+ continue
frame = frame.decode('UTF-8', 'ignore')
- try:
- frame_time = float(frame[:13]) # First 13 chars is the timestamp
- # Convert to datetime object
- frame_time = datetime.fromtimestamp(frame_time/1000)
- if show_esc:
- frame_time = frame_time.strftime(u'\x1b[0m%b %m %H:%M:%S')
- else: # Renditions preserved == I want pretty. Make the date bold:
- frame_time = frame_time.strftime(
- u'\x1b[0;1m%b %m %H:%M:%S\x1b[m')
- for char in frame[14:]:
- if '\x1b[H\x1b[2J' in out_line: # Clear screen sequence
- # Handle the clear screen (usually ctrl-l) by outputting
- # a new log entry line to avoid confusion regarding what
- # happened at this time.
- out_line += "^L" # Clear screen is a ctrl-l or equivalent
- if show_esc:
- adjusted = raw(out_line)
- else:
- adjusted = escape_escape_seq(out_line, rstrip=True)
- out += frame_time + ' %s\n' % adjusted
- out_line = ""
- continue
- if char == u'\n':
+ frame_time = float(frame[:13]) # First 13 chars is the timestamp
+ # Convert to datetime object
+ frame_time = datetime.fromtimestamp(frame_time/1000)
+ if show_esc:
+ frame_time = frame_time.strftime(u'\x1b[0m%b %m %H:%M:%S')
+ else: # Renditions preserved == I want pretty. Make the date bold:
+ frame_time = frame_time.strftime(
+ u'\x1b[0;1m%b %m %H:%M:%S\x1b[m')
+ for char in frame[14:]:
+ if '\x1b[H\x1b[2J' in out_line: # Clear screen sequence
+ # Handle the clear screen (usually ctrl-l) by outputting
+ # a new log entry line to avoid confusion regarding what
+ # happened at this time.
+ out_line += "^L" # Clear screen is a ctrl-l or equivalent
+ if show_esc:
+ adjusted = raw(out_line)
+ else:
+ adjusted = escape_escape_seq(out_line, rstrip=True)
+ file_like.write(frame_time + ' %s\n' % adjusted)
+ out_line = ""
+ continue
+ if char == u'\n':
+ if show_esc:
+ adjusted = raw(out_line)
+ else:
+ adjusted = escape_escape_seq(out_line, rstrip=True)
+ file_like.write(frame_time + ' %s\n' % adjusted)
+ out_line = ""
+ cr = False
+ elif char == u'\r':
+ # Carriage returns need special handling. Make a note of it
+ cr = True
+ else:
+ # \r without \n means that characters were (likely)
+ # overwritten. This usually happens when the user gets to
+ # the end of the line (which would create a newline in the
+ # terminal but not necessarily the log), erases their
+ # current line (e.g. ctrl-u), or an escape sequence modified
+ # the line in-place. To clearly indicate what happened we
+ # insert a '^M' and start a new line so as to avoid
+ # confusion over these events.
+ if cr:
+ out_line += "^M"
+ file_like.write(frame_time + ' ')
if show_esc:
adjusted = raw(out_line)
else:
adjusted = escape_escape_seq(out_line, rstrip=True)
- out += frame_time + ' %s\n' % adjusted
+ file_like.write(adjusted + '\n')
out_line = ""
- cr = False
- elif char in u'\r':
- # Carriage returns need special handling. Make a note of it
- cr = True
- else:
- # \r without \n means that characters were (likely)
- # overwritten. This usually happens when the user gets to
- # the end of the line (which would create a newline in the
- # terminal but not necessarily the log), erases their
- # current line (e.g. ctrl-u), or an escape sequence modified
- # the line in-place. To clearly indicate what happened we
- # insert a '^M' and start a new line so as to avoid
- # confusion over these events.
- if cr:
- out_line += "^M"
- out += frame_time + ' '
- if show_esc:
- adjusted = raw(out_line)
- else:
- adjusted = escape_escape_seq(out_line, rstrip=True)
- out += adjusted
- out += '\n'
- out_line = ""
- out_line += char
- cr = False
- except ValueError as e:
+ out_line += char
+ cr = False
+ file_like.flush()
+
+def get_terminal_size():
+ """
+ Returns the size of the current terminal in the form of (rows, cols).
+ """
+ env = os.environ
+ def ioctl_GWINSZ(fd):
+ try:
+ cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ,
+ '1234'))
+ except:
+ return None
+ return cr
+ cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
+ if not cr:
+ try:
+ fd = os.open(os.ctermid(), os.O_RDONLY)
+ cr = ioctl_GWINSZ(fd)
+ os.close(fd)
+ except:
pass
- # End of file. No biggie.
- return out
+ if not cr:
+ try:
+ cr = (env['LINES'], env['COLUMNS'])
+ except:
+ cr = (25, 80)
+ return int(cr[0]), int(cr[1])
if __name__ == "__main__":
"""Parse command line arguments and view the log in the specified format."""
@@ -316,9 +367,20 @@ def flatten_log(log_path, preserve_renditions=True, show_esc=False):
parser.print_help()
sys.exit(1)
log_path = args[0]
- if options.flat:
- result = flatten_log(
- log_path, preserve_renditions=options.pretty, show_esc=options.raw)
- print(result)
- else:
- playback_log(log_path, sys.stdout)
+ if not os.path.exists(log_path):
+ print("ERROR: %s does not exist" % log_path)
+ sys.exit(1)
+ try:
+ if options.flat:
+ result = flatten_log(
+ log_path,
+ sys.stdout,
+ preserve_renditions=options.pretty, show_esc=options.raw)
+ print(result)
+ else:
+ playback_log(log_path, sys.stdout, show_esc=options.raw)
+ except KeyboardInterrupt:
+ # Move the cursor to the bottom of the screen to ensure it isn't in the
+ # middle of the log playback output
+ rows, cols = get_terminal_size()
+ print("\x1b[%s;0H\n" % rows)
View
24 gateone/plugins/logging/logging_plugin.py
@@ -46,7 +46,7 @@
# Our stuff
from gateone import BaseHandler, PLUGINS
-from logviewer import flatten_log
+from logviewer import flatten_log, get_frames
from termio import retrieve_first_frame, retrieve_last_frame
from termio import get_or_update_metadata
from utils import get_translation, json_encode
@@ -58,6 +58,8 @@
import tornado.ioloop
from tornado.escape import json_decode
+# TODO: Make the log retrieval functions work incrementally as logs are read so they don't have to be stored entirely in memory before being sent to the client.
+
# Globals
SEPARATOR = u"\U000f0f0f" # The character used to separate frames in the log
PROCS = {} # For tracking/cancelling background processes
@@ -83,15 +85,17 @@ def retrieve_log_frames(golog_path, rows, cols, limit=None):
# 14/7 for the em_height should be OK for most browsers to ensure that
# images don't always wind up at the bottom of the screen.
rows=rows, cols=cols, em_dimensions={'height':14, 'width':7})
- frames = gzip.open(golog_path).read().split(SEPARATOR.encode('UTF-8'))[1:]
- if not limit:
- limit = len(frames)
- for frame in frames[:limit]:
+ for i, frame in enumerate(get_frames(golog_path)):
+ if limit and i == limit:
+ break
if len(frame) > 14:
frame_time = int(float(frame[:13]))
frame_screen = frame[14:] # Skips the colon
term.write(frame_screen)
- scrollback, screen = term.dump_html()
+ # Ensure we're not in the middle of capturing an image. Otherwise
+ # it might get cut off and result in no image being shown.
+ if not term.image:
+ scrollback, screen = term.dump_html()
out_frames.append({'screen': screen, 'time': frame_time})
return out_frames # Skip the first frame which is the metadata
@@ -286,11 +290,15 @@ def _retrieve_log_flat(queue, settings):
out_dict['metadata'] = get_or_update_metadata(log_path, user)
out_dict['metadata']['filename'] = log_filename
out_dict['result'] = "Success"
- flattened_log = flatten_log(log_path)
- flattened_log = flattened_log.replace('\n', '\r\n') # Needed to emulate an actual term
+ import StringIO
# Use the terminal emulator to create nice HTML-formatted output
from terminal import Terminal
term = Terminal(rows=100, cols=300)
+ io_obj = StringIO.StringIO()
+ flatten_log(log_path, io_obj)
+ io_obj.seek(0)
+ # Needed to emulate an actual term
+ flattened_log = io_obj.read().replace('\n', '\r\n')
term.write(flattened_log)
scrollback, screen = term.dump_html()
# Join them together
View
2 gateone/plugins/ssh/scripts/ssh_connect.py
@@ -268,6 +268,8 @@ def openssh_connect(
"-oNoHostAuthenticationForLocalhost=yes",
# This ensure's that the executing user's identity won't be used:
"-oIdentityFile=%s" % ssh_default_identity_path,
+ # This ensures the other end can tell we're a Gate One terminal
+ "-oSendEnv=GO_TERM",
"-p", str(port),
"-l", user,
]
View
16 gateone/terminal.py
@@ -857,7 +857,7 @@ def initialize(self, rows=24, cols=80, em_dimensions=None):
# ambiguous.
jpeg_header = re.compile('.*\xff\xd8\xff.+JFIF\x00|.*\xff\xd8\xff.+Exif\x00', re.DOTALL)
jpeg_whole = re.compile(
- '\xff\xd8\xff.+JFIF\x00.+\xff\xd9(?!\xff)|\xff\xd8\xff.+Exif\x00.+\xff\xd9(?!\xff)', re.DOTALL)
+ '\xff\xd8\xff.+JFIF\x00.*?\xff\xd9|\xff\xd8\xff.+Exif\x00.*?\xff\xd9', re.DOTALL)
self.magic = {
# Dict for magic "numbers" so we can tell when a particular type of
# file begins and ends (so we can capture it in binary form and
@@ -1304,9 +1304,15 @@ def write(self, chars, special_checks=True):
before_chars = ""
after_chars = ""
for magic_header in magic.keys():
- if magic_header.match(str(chars)):
- self.matched_header = magic_header
- self.timeout_image = datetime.now()
+ try:
+ if magic_header.match(str(chars)):
+ self.matched_header = magic_header
+ self.timeout_image = datetime.now()
+ except UnicodeEncodeError:
+ # Gibberish; drop it and pretend it never happened
+ self.esc_buffer = ""
+ # Make it so it won't barf
+ chars = chars.encode('UTF-8', 'ignore')
if self.image or self.matched_header:
self.image += chars
match = magic[self.matched_header].search(self.image)
@@ -1731,6 +1737,8 @@ def _capture_image(self):
"""
# Remove the extra \r's that the terminal adds:
self.image = str(self.image).replace('\r\n', '\n')
+ # Need it twice since you get \r\r\n in some situations
+ self.image = self.image.replace('\r\n', '\n')
logging.debug("_capture_image() len(self.image): %s" % len(self.image))
if Image: # PIL is loaded--try to guess how many lines the image takes
i = StringIO.StringIO(self.image)
View
53 gateone/termio.py
@@ -178,7 +178,7 @@ def retrieve_last_frame(golog_path):
return # Something wrong with the file
distance += distance
# Now that we're at the end, go back a bit and split from there
- golog.seek(golog.tell() - chunk_size)
+ golog.seek(golog.tell() - chunk_size*2)
end_frames = golog.read().split(encoded_separator)
if len(end_frames) > 1:
# Very last item will be empty
@@ -218,14 +218,10 @@ def get_or_update_metadata(golog_path, user, force_update=False):
# of magnitude)
chunk_size = 1024*128 # 128k should be enough for a 100x300 terminal full
# of 4-byte unicode characters. That would be one BIG frame (i.e. unlikely).
- # Sadly, we have to read the whole thing into memory (log_data) in order to
- # perform this important work (creating proper metadata).
- # On the plus side re-compressing the log can save a _lot_ of disk space
- # Why? Because termio.py writes the frames using gzip.open() in append mode
- # which is a lot less efficient than compressing all the data in one go.
log_data = b''
total_frames = 0
- while True:
+ max_data = chunk_size * 10 # Hopefully this is enough to capture a title
+ while len(log_data) < max_data:
try:
chunk = golog.read(chunk_size)
except IOError:
@@ -234,9 +230,13 @@ def get_or_update_metadata(golog_path, user, force_update=False):
log_data += chunk
if len(chunk) < chunk_size:
break
+ # Remove the trailing incomplete frame
+ log_data = encoded_separator.join(log_data.split(encoded_separator)[:-1])
log_data = log_data.decode('UTF-8', 'ignore')
start_date = first_frame[:13] # Getting the start date is easy
last_frame = retrieve_last_frame(golog_path) # This takes some work
+ if not last_frame:
+ return # Something wrong with log
end_date = last_frame[:13]
version = u"1.0"
connect_string = None
@@ -267,11 +267,40 @@ def get_or_update_metadata(golog_path, user, force_update=False):
})
# Make a *new* first_frame
first_frame = u"%s:" % start_date
- first_frame += json_encode(metadata)
+ first_frame += json_encode(metadata) + SEPARATOR
+ first_frame = first_frame.encode('UTF-8')
# Replace the first frame and re-save the log
- log_data = log_data.encode('UTF-8') # Encode this first to ensure 'distance'
- log_data = (first_frame + SEPARATOR).encode('UTF-8') + log_data[distance:]
- gzip.open(golog_path, 'w').write(log_data)
+ temp_path = "%s.tmp" % golog_path
+ golog = gzip.open(golog_path) # Re-open
+ new_golog = gzip.open(temp_path, 'w')
+ new_golog.write(first_frame)
+ # Now write out the rest of it
+ count = 0
+ while True:
+ try:
+ chunk = golog.read(chunk_size)
+ except IOError:
+ return # Something wrong with the file
+ if count == 0:
+ if chunk[14:].startswith('{'): # Old/incomplete metadata
+ # Need to keep reading until the next frame
+ while True:
+ try:
+ chunk += golog.read(chunk_size)
+ except IOError:
+ return # Something wrong with the file
+ if encoded_separator in chunk:
+ # This removes the first frame:
+ chunk = encoded_separator.join(
+ chunk.split(encoded_separator)[1:])
+ break
+ new_golog.write(chunk)
+ if len(chunk) < chunk_size:
+ break # Everything must come to an end
+ count += 1
+ # Overwrite the old log
+ import shutil
+ shutil.move(temp_path, golog_path)
return metadata
# Exceptions
@@ -1202,6 +1231,8 @@ def spawn(self,
rows, cols, repr(env), repr(em_dimensions)))
rows = min(200, rows) # Max 200 to limit memory utilization
cols = min(500, cols) # Max 500 for the same reason
+ self.rows = rows
+ self.cols = cols
import pty
pid, fd = pty.fork()
if pid == 0: # We're inside the child process

0 comments on commit 41d3453

Please sign in to comment.
Something went wrong with that request. Please try again.