Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
476 lines (435 sloc) 19 KB
#!python
# RCSFile class and supporting functions / parsers
# Copyright (C) 2011 Ryan Kavanagh <rak@debian.org>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
from pyparsing import *
import pyparsing
import re
import string
import warnings
if pyparsing.__version__ < '1.5.5':
raise Exception("Needs pyparsing version 1.5.5 or greater")
# Special characters in the RCS file format
special = '$,.:;@'
RCSdigit = Word(nums, min=1, max=1).setName('RCSdigit')
RCSnum = Word(nums + '.').setName('RCSnum')
RCSidchar = ZeroOrMore(string.whitespace) + CharsNotIn(special + string.whitespace).setName('RCSidchar')
RCSid = Combine(
Optional(".") + RCSidchar + ZeroOrMore(RCSidchar | RCSnum)
^ RCSidchar + ZeroOrMore(RCSidchar | RCSnum) )
RCSsym = ZeroOrMore(RCSdigit) + RCSidchar + ZeroOrMore(RCSidchar |
RCSdigit).setName('RCSsym')
RCSsymbol = Combine(RCSsym + ':' + RCSnum).setName('RCSsymbol')
RCSlocks = (RCSid + ':' + RCSnum).setName('locks')
RCSstring = QuotedString(quoteChar='@', escQuote='@@', multiline=True)
RCSadmin = \
Keyword('head').suppress() + \
Optional(RCSnum)('head') + \
Suppress(';') + \
Optional(Keyword('branch').suppress() +
Optional(RCSnum)('branch') +
Suppress(';')
) + \
Keyword('access').suppress() + \
Group(ZeroOrMore(RCSid))('access') + \
Suppress(';') + \
Keyword('symbols').suppress() + \
Group(ZeroOrMore(RCSsymbol))('symbols') + \
Suppress(';') + \
Keyword('locks').suppress() + \
Group(ZeroOrMore(RCSlocks))('locks') + \
Suppress(';') + \
Optional(Keyword('strict').suppress() +
Suppress(';')
) + \
Optional(Keyword('comment').suppress() +
RCSstring.setResultsName('comment') +
Suppress(';')
) + \
Optional(Keyword('expand').suppress() +
RCSstring.setResultsName('expand') +
Suppress(';')
) #+ \
#ZeroOrMore(~RCSdelta + ~RCSdesc + RCSid + ZeroOrMore(RCSid ^ RCSnum ^ RCSstring ^ ':'))
RCSdelta = RCSnum("deltanum") + \
Keyword('date').suppress() + \
RCSnum("date") + \
Suppress(';') + \
Keyword('author').suppress() + \
RCSid("author") + \
Suppress(';') + \
Keyword('state').suppress() + \
Optional(RCSid)("state") + \
Suppress(';') + \
Keyword('branches').suppress() + \
Group(ZeroOrMore(RCSnum))("branches") + \
Suppress(';') + \
Keyword("next").suppress() + \
Optional(RCSnum)("next") + \
Suppress(';') #+ \
#ZeroOrMore(~RCSdelta + ~Keyword('desc') + RCSid + ZeroOrMore(RCSid ^ RCSnum ^ RCSstring ^ ':'))
RCSdesc = Keyword('desc').suppress() + RCSstring("desc")
RCSdeltatext = RCSnum("deltanum") +\
Keyword('log') + RCSstring("log") + \
ZeroOrMore(~Keyword('text') + RCSid + ZeroOrMore(RCSid ^ RCSnum ^ RCSstring ^ ':')) + \
Keyword('text') + RCSstring("text")
RCStext = RCSadmin("admin") + Group(ZeroOrMore(Group(RCSdelta)))("deltas") + \
RCSdesc + Group(ZeroOrMore(Group(RCSdeltatext)))("deltatexts")
def test(i):
try:
return RCStext.parseString(i)
except ParseException, pe:
print pe.markInputline()
raise pe
def cmp_rcsdates(date1, date2):
"""
Compares two RCS dates. Accounts for RCS dates being of the form
YY.mm.dd.HH.MM.SS before 2000 and YYYY.mm.dd.HH.MM.SS afterwards.
"""
dates = [date1, date2]
for i, date in enumerate(dates):
if len(date) == 17:
dates[i] = '19' + date
return min(dates)
class RCSFile:
"""
RCSFile represents RCS files used by the RCS and CVS version control
systems.
See rcsfile(5)[http://www.freebsd.org/cgi/man.cgi?query=rcsfile] for
details on the format.
If you give us a broken RCS file (for example, more than one
deltatext with the same delta number and date), we *will* crash and
burn. You have been warned.
"""
def __init__(self, lines=None, file=None):
""" Initialise the object with the lines of an RCS file. """
if lines and not file:
parsed = RCStext.parseString(''.join(lines))
self._filename = '?'
elif file and not lines:
parsed = RCStext.parseFile(file)
if type(file) is str:
self._filename = file
elif type(file) is file:
self._filename = file
else:
self._filename = '?'
else:
raise ValueError('Either lines or file must be provided')
# self._lines will contain all lines, newline characters removed
self._lines = []
# self._admin will contain the admin block's values
self._admin = parsed.admin
# self._symbols will contain tags / symbols
self._symbols = dict(map(lambda s: s.split(':'),
self._admin.symbols))
# self._deltas will contain delta blocks
self._deltas = dict((delta.deltanum, delta)
for delta in parsed.deltas)
# self._deltatexts will contain deltatext blocks
self._deltatexts = dict((deltatext.deltanum, deltatext)
for deltatext in parsed.deltatexts)
def get_deltanums(self):
""" Returns a list of all delta numbers. """
return [d.deltanum for d in self._deltas]
def get_delta(self, deltanum):
""" Return metadata about delta delta. """
try:
return self._deltas[deltanum]
except KeyError:
raise ValueError('Delta %s does not exist.' % delta)
def get_deltatext(self, deltanum):
""" Return the deltatext for delta deltanum. """
try:
return self._deltatexts[deltanum]
except KeyError:
raise ValueError('Delta %s does not exist.' % deltanum)
def get_head(self):
""" Return head, or the most revision if unset. """
if self._admin.head:
return self._admin.head
else:
deltaints = [map(int, d.deltanum.split('.'))
for d in self._deltas]
# We don't want any branches, so the max version X.X
return '.'.join(max(deltanum for deltanum in deltaints
if len(deltanum) == 2))
def get_next_tuples(self):
"""
Returns a list of (deltanum, next) tuples for every
delta.
"""
return [(d, self._deltas[d].next) for d in self._deltas]
def get_ancestor_tuples(self, deltanum):
"""
Returns a list of ancestors for delta 'deltanum', including
deltanum, in the format [(delta, next)]
"""
nexts = self.get_next_tuples()
ancestors = []
for i, next in enumerate(nexts):
if next[0] == deltanum:
# Probably more effecient that using 'nexts.remove'
# since we don't have to search the whole list
ancestors.append(nexts.pop(i))
break
# Until there are no more ancestors (next is in the second
# position of the tuple)
while ancestors[-1][1]:
for i, next in enumerate(nexts):
if next[0] == ancestors[-1][1]:
ancestors.append(nexts.pop(i))
break
else:
# If we get here, it means that we can't find the next
# delta, the file is screwy.
warnings.warn("Missing delta: " + deltanum)
break
return ancestors
def get_tags(self, deltanum):
"""
Returns the tags ('symbols' in RCS lingo) associated with
deltanum.
"""
return [x.split(':')[0] for x in self._admin.symbols
if x.endswith(':' + deltanum)]
def get_author(self, deltanum):
""" Returns the author associated with deltanum. """
return self._deltas[deltanum]['author']
def get_date(self, deltanum):
""" Returns the date associated with deltanum. """
return self._deltas[deltanum]['date']
def get_message(self, deltanum):
""" Returns the commit message associated with deltanum. """
return self._deltatexts[deltanum]['log']
def grep(self, pattern, format="rlL", wraplines=False):
"""
By default, outputs a list of
(revision, lineno, line)
where line matches pattern. Line numbering starts at 1.
Optional argument format determines the output tuple format:
r ::= revision
l ::= line number
L ::= matching line
a ::= line's author
d ::= line's date
D ::= line's date in ISO8601 format: YYYY-MM-DDThh:mm:ssZ
t ::= line's tags
f ::= filename
m ::= commit message
Approach is as follows:
1. Set I to the most recent revision, in which the pattern
occurs. Record its revision, the matching linenos and the
matching lines.
2. I = I.next
3. Copy the record for (I.previous) to I.
4. Look at the diff of (I+1) and I.
4.1 Does it delete any lines recorded for I?
4.1.Y Remove the deleted lines from I's record
4.2 Does it add any line matching the pattern?
4.2.Y Add them to I's record
5. Adjust the line numbers
6. exists(I.next)?
6.Y GOTO 2
6.N return records
This approach saves us from having to care about uneeded data,
important when we have a lot to process.
The wraplines parameter decides whether or not we include the
next line when the present line ends with a backslash.
"""
matcher = re.compile(pattern)
if wraplines:
wraplines = re.compile(r'.*\\$')
else:
# Will never match anything
wraplines = re.compile('$.^')
# Matches an insertion command:
insc = re.compile('a(?P<line>[0-9]+) (?P<lines>[0-9]+)')
# Matches a deletion command:
delc = re.compile('d(?P<line>[0-9]+) (?P<lines>[0-9]+)')
# Various statuses we can be in
INSERT = 0 # We're in insert mode
DELETE = 1 # We're in delete mode
# Where we'll store our matches
matches = []
ancestors = self.get_ancestor_tuples(self.get_head())
for delta in ancestors:
# Our current status, INSERT or DELETE
status = None
# M in (a|d)M N
startline = None #
# N in (a|d)M N
nolines = None
# We're inserting which line of the current insert block?
insertline = None
curr, next = delta
deletions = []
insertions = []
# We must keep track of lines to insert. If we appended
# lines directly to match, we would get multiple lines
# associated with a particular line number. Take the command
# 'a3 2' in a 6 line file. Appending directly to match would
# give us two lines with lineno 4 and two with lineno 5.
insertlines = []
deltatext = self.get_deltatext(curr).text.split('\n')
# Do we also take the next line?
takenext = False
for i, line in enumerate(deltatext, 1):
# Ommit the last line in head, it's an empty '\n'
if delta == ancestors[0] and i != len(deltatext):
if matcher.match(line) or takenext:
matches.append((curr, i, line))
if wraplines.match(line):
takenext = True
else:
takenext = False
continue
elif delc.match(line):
status = DELETE
gd = delc.match(line).groupdict()
startline = int(gd['line'])
nolines = int(gd['lines'])
deletions.append((startline, nolines))
elif insc.match(line):
status = INSERT
gd = insc.match(line).groupdict()
startline = int(gd['line'])
nolines = int(gd['lines'])
insertline = 1
insertions.append((startline, nolines))
continue
if status == DELETE:
matches = [match for match in matches
if not (match[0] == curr and
startline <= match[1] < startline + nolines)]
status = None
elif status == INSERT:
if insertline <= nolines:
if matcher.match(line) or takenext:
insertlines.append((curr,
# Inserts are after startline
startline,
insertline,
line))
if wraplines.match(line):
takenext = True
else:
takenext = False
insertline += 1
else:
status = None
insertions.sort()
deletions.sort()
# Delete and insert commands use linenumbers in the
# original file. We however need to determine the line
# numbers of the modified file (that in the current
# delta). Start by looking at all the matches left over
# after deletions.
for i, match in enumerate(matches):
if match[0] == curr:
total_insertions = sum(x[1] for x in insertions
if x[0] < match[1])
total_deletions = sum(x[1] for x in deletions
if x[0] < match[1])
total_adjustment = total_insertions - \
total_deletions
matches[i] = (match[0],
match[1] + total_adjustment,
match[2])
# We can now handle adjusting all those which are insertions
for i, match in enumerate(insertlines):
# If match is part of an insertion, the start of which
# overlapped with a deletion (i.e. d3 2; a3 X), we need
# to decrement match's line number by 1
deliter = iter(deletions)
deliter_val = (-1, -1) # Dummy value for start
match_overlap = 0
for ins in insertions:
if ins[0] == match[1]:
while deliter_val[0] < ins[0]:
try:
deliter_val = deliter.next()
except StopIteration:
break
if deliter_val[0] == ins[0]:
# We have a 'dM N\n aM P', that is, a
# deletion and addition that overlap, and
# match is one of those additions.
match_overlap = 1
break
total_insertions = sum(x[1] for x in insertions
if x[0] < match[1])
total_deletions = sum(x[1] for x in deletions
if x[0] < match[1])
total_adjustment = total_insertions - \
total_deletions - \
match_overlap
insertlines[i] = (match[0],
match[1] + match[2] + total_adjustment,
match[3])
matches.extend(insertlines)
# We originally had:
# first_of_curr = min(x[0] for x in enumerate(matches)
# if x[1][0] == curr)
# but a third of our execution time was spent on it. We
# know our list is sorted by revision, just count
# backwards.
first_of_curr = -1
for i in range(len(matches) - 1, -1, -1):
if matches[i][0] == curr:
first_of_curr = i
else:
break
matches[first_of_curr:] = sorted(matches[first_of_curr:])
if next:
for match in matches:
if match[0] == curr:
matches.append((next, match[1], match[2]))
if format == 'rlL':
return matches
else:
formatted_matches = []
for match in matches:
formatted_match = []
for attr in format:
if attr == 'r':
formatted_match.append(match[0])
elif attr == 'l':
formatted_match.append(match[1])
elif attr == 'L':
formatted_match.append(match[2])
elif attr == 'a':
formatted_match.append(self.get_author(match[0]))
elif attr == 'd':
formatted_match.append(self.get_date(match[0]))
elif attr == 'D':
date = self.get_date(match[0]).split('.')
if len(date[0]) == 2:
# We have a two digit year.
date[0] = '19' + date[0]
# Just glue strings together. Saves us many
# seconds over using strptime and strftime.
formatted_match.append('-'.join(date[:3]) + 'T' +
':'.join(date[3:]) + 'Z')
elif attr == 't':
formatted_match.append(self.get_tags(match[0]))
elif attr == 'f':
formatted_match.append(self._filename)
elif attr == 'm':
formatted_match.append(self.get_message(match[0]))
else:
raise ValueError("Unknown formatting " +
"option %s." % attr)
formatted_matches.append(tuple(formatted_match))
return formatted_matches