-
Notifications
You must be signed in to change notification settings - Fork 27
/
pygtail.py
342 lines (298 loc) · 13 KB
/
pygtail.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
#!/usr/bin/python -tt
# -*- coding: utf-8 -*-
# pygtail - a python "port" of logtail2
# Copyright (C) 2011 Brad Greenlee <brad@footle.org>
#
# Derived from logcheck <http://logcheck.org>
# Copyright (C) 2003 Jonathan Middleton <jjm@ixtab.org.uk>
# Copyright (C) 2001 Paul Slootman <paul@debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
from __future__ import print_function
from os import fstat, stat
import os
from os.path import exists, getsize
import sys
import glob
import gzip
from optparse import OptionParser
__version__ = '0.11.1'
PY3 = sys.version_info[0] == 3
if PY3:
text_type = str
else:
text_type = unicode
def force_text(s, encoding='utf-8', errors='strict'):
if isinstance(s, text_type):
return s
return s.decode(encoding, errors)
class Pygtail(object):
"""
Creates an iterable object that returns only unread lines.
Keyword arguments:
offset_file File to which offset data is written (default: <logfile>.offset).
paranoid Update the offset file every time we read a line (as opposed to
only when we reach the end of the file (default: False))
every_n Update the offset file every n'th line (as opposed to only when
we reach the end of the file (default: 0))
on_update Execute this function when offset data is written (default False)
copytruncate Support copytruncate-style log rotation (default: True)
log_patterns List of custom rotated log patterns to match (default: None)
full_lines Only log when line ends in a newline `\n` (default: False)
"""
def __init__(self, filename, offset_file=None, paranoid=False, copytruncate=True,
every_n=0, on_update=False, read_from_end=False, log_patterns=None, full_lines=False):
self.filename = filename
self.paranoid = paranoid
self.every_n = every_n
self.on_update = on_update
self.copytruncate = copytruncate
self.read_from_end = read_from_end
self.log_patterns = log_patterns
self._full_lines = full_lines
self._offset_file = offset_file or "%s.offset" % self.filename
self._offset_file_inode = 0
self._offset = 0
self._since_update = 0
self._fh = None
self._rotated_logfile = None
# if offset file exists and non-empty, open and parse it
if exists(self._offset_file) and getsize(self._offset_file):
offset_fh = open(self._offset_file, "r")
(self._offset_file_inode, self._offset) = \
[int(line.strip()) for line in offset_fh]
offset_fh.close()
if self._offset_file_inode != stat(self.filename).st_ino or \
stat(self.filename).st_size < self._offset:
# The inode has changed or filesize has reduced so the file
# might have been rotated.
# Look for the rotated file and process that if we find it.
self._rotated_logfile = self._determine_rotated_logfile()
def __del__(self):
if self._filehandle():
self._filehandle().close()
def __iter__(self):
return self
def next(self):
"""
Return the next line in the file, updating the offset.
"""
try:
line = self._get_next_line()
except StopIteration:
# we've reached the end of the file; if we're processing the
# rotated log file or the file has been renamed, we can continue with the actual file; otherwise
# update the offset file
if self._is_new_file():
self._rotated_logfile = None
self._fh.close()
self._offset = 0
# open up current logfile and continue
try:
line = self._get_next_line()
except StopIteration: # oops, empty file
self._update_offset_file()
raise
else:
self._update_offset_file()
raise
if self.paranoid:
self._update_offset_file()
elif self.every_n and self.every_n <= self._since_update:
self._update_offset_file()
return line
def __next__(self):
"""`__next__` is the Python 3 version of `next`"""
return self.next()
def readlines(self):
"""
Read in all unread lines and return them as a list.
"""
return [line for line in self]
def read(self):
"""
Read in all unread lines and return them as a single string.
"""
lines = self.readlines()
if lines:
try:
return ''.join(lines)
except TypeError:
return ''.join(force_text(line) for line in lines)
else:
return None
def _is_closed(self):
if not self._fh:
return True
try:
return self._fh.closed
except AttributeError:
if isinstance(self._fh, gzip.GzipFile):
# python 2.6
return self._fh.fileobj is None
else:
raise
def _filehandle(self):
"""
Return a filehandle to the file being tailed, with the position set
to the current offset.
"""
if not self._fh or self._is_closed():
filename = self._rotated_logfile or self.filename
if filename.endswith('.gz'):
self._fh = gzip.open(filename, 'r')
else:
self._fh = open(filename, "r", 1)
if self.read_from_end and not exists(self._offset_file):
self._fh.seek(0, os.SEEK_END)
else:
self._fh.seek(self._offset)
return self._fh
def _update_offset_file(self):
"""
Update the offset file with the current inode and offset.
"""
if self.on_update:
self.on_update()
offset = self._filehandle().tell()
inode = stat(self.filename).st_ino
fh = open(self._offset_file, "w")
fh.write("%s\n%s\n" % (inode, offset))
fh.close()
self._since_update = 0
def _determine_rotated_logfile(self):
"""
We suspect the logfile has been rotated, so try to guess what the
rotated filename is, and return it.
"""
rotated_filename = self._check_rotated_filename_candidates()
if rotated_filename and exists(rotated_filename):
if stat(rotated_filename).st_ino == self._offset_file_inode:
return rotated_filename
# if the inode hasn't changed, then the file shrank; this is expected with copytruncate,
# otherwise print a warning
if stat(self.filename).st_ino == self._offset_file_inode:
if self.copytruncate:
return rotated_filename
else:
sys.stderr.write(
"[pygtail] [WARN] file size of %s shrank, and copytruncate support is "
"disabled (expected at least %d bytes, was %d bytes).\n" %
(self.filename, self._offset, stat(self.filename).st_size))
return None
def _check_rotated_filename_candidates(self):
"""
Check for various rotated logfile filename patterns and return the first
match we find.
"""
# savelog(8)
candidate = "%s.0" % self.filename
if (exists(candidate) and exists("%s.1.gz" % self.filename) and
(stat(candidate).st_mtime > stat("%s.1.gz" % self.filename).st_mtime)):
return candidate
# logrotate(8)
# with delaycompress
candidate = "%s.1" % self.filename
if exists(candidate):
return candidate
# without delaycompress
candidate = "%s.1.gz" % self.filename
if exists(candidate):
return candidate
rotated_filename_patterns = [
# logrotate dateext rotation scheme - `dateformat -%Y%m%d` + with `delaycompress`
"%s-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",
# logrotate dateext rotation scheme - `dateformat -%Y%m%d` + without `delaycompress`
"%s-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9].gz",
# logrotate dateext rotation scheme - `dateformat -%Y%m%d-%s` + with `delaycompress`
"%s-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",
# logrotate dateext rotation scheme - `dateformat -%Y%m%d-%s` + without `delaycompress`
"%s-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9].gz",
# for TimedRotatingFileHandler
"%s.[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]",
]
if self.log_patterns:
rotated_filename_patterns.extend(self.log_patterns)
# break into directory and filename components to support cases where the
# the file is prepended as part of rotation
file_dir, rel_filename = os.path.split(self.filename)
for rotated_filename_pattern in rotated_filename_patterns:
candidates = glob.glob(os.path.join(file_dir, rotated_filename_pattern % rel_filename))
if candidates:
candidates.sort()
return candidates[-1] # return most recent
# no match
return None
def _is_new_file(self):
# Processing rotated logfile or at the end of current file which has been renamed
return self._rotated_logfile or \
self._filehandle().tell() == fstat(self._filehandle().fileno()).st_size and \
fstat(self._filehandle().fileno()).st_ino != stat(self.filename).st_ino
def _get_next_line(self):
curr_offset = self._filehandle().tell()
line = self._filehandle().readline()
if self._full_lines:
if not line.endswith('\n'):
self._filehandle().seek(curr_offset)
raise StopIteration
if not line:
raise StopIteration
self._since_update += 1
return line
def main():
# command-line parsing
cmdline = OptionParser(usage="usage: %prog [options] logfile",
description="Print log file lines that have not been read.")
cmdline.add_option("--offset-file", "-o", action="store",
help="File to which offset data is written (default: <logfile>.offset).")
cmdline.add_option("--paranoid", "-p", action="store_true",
help="Update the offset file every time we read a line (as opposed to"
" only when we reach the end of the file).")
cmdline.add_option("--every-n", "-n", action="store",
help="Update the offset file every n'th time we read a line (as opposed to"
" only when we reach the end of the file).")
cmdline.add_option("--no-copytruncate", action="store_true",
help="Don't support copytruncate-style log rotation. Instead, if the log file"
" shrinks, print a warning.")
cmdline.add_option("--read-from-end", action="store_true",
help="Read log file from the end if offset file is missing. Useful for large files.")
cmdline.add_option("--log-pattern", action="append",
help="Custom log rotation glob pattern. Use %s to represent the original filename."
" You may use this multiple times to provide multiple patterns.")
cmdline.add_option("--full_lines", action="store_true",
help="Only log when line ends in a newline (\\n)")
cmdline.add_option("--version", action="store_true",
help="Print version and exit.")
options, args = cmdline.parse_args()
if options.version:
print("pygtail version", __version__)
sys.exit(0)
if (len(args) != 1):
cmdline.error("Please provide a logfile to read.")
if options.every_n:
options.every_n = int(options.every_n)
pygtail = Pygtail(args[0],
offset_file=options.offset_file,
paranoid=options.paranoid,
every_n=options.every_n,
copytruncate=not options.no_copytruncate,
read_from_end=options.read_from_end,
log_patterns=options.log_pattern,
full_lines=options.full_lines
)
for line in pygtail:
sys.stdout.write(line)
if __name__ == "__main__":
main()