Permalink
Browse files

Move out the grabpatch from the parser

The class LogPatchSplitter provides an iterator per patch.  This
makes the code cleaner, easier to read and more pythonic.
The class only gets each commit set as lines.

It is possible to test it separately by:
   $ git log | python logparser.py | more

Signed-off-by: Germán Póo-Caamaño <gpoo@gnome.org>
  • Loading branch information...
1 parent efcc420 commit 7b26ae210995a6a746dbfb20ede382bee7808dc9 @gpoo gpoo committed Jun 23, 2011
Showing with 102 additions and 24 deletions.
  1. +12 −24 gitdm
  2. +90 −0 logparser.py
View
36 gitdm
@@ -1,11 +1,12 @@
#!/usr/bin/python
-#
+#-*- coding:utf-8 -*-
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-11 Eklektix, Inc.
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
+# Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
@@ -15,6 +16,7 @@ import database, csvdump, ConfigFile, reports
import getopt, datetime
import os, re, sys, rfc822, string
import file_types
+import logparser
from patterns import patterns
Today = datetime.date.today()
@@ -204,29 +206,14 @@ def parse_numstat(line, file_filter):
#
# The core hack for grabbing the information about a changeset.
#
-def grabpatch():
- global NextLine
-
- while (1):
- m = patterns['commit'].match (NextLine)
- if m:
- break;
- NextLine = sys.stdin.readline ()
- if not NextLine:
- return
+def grabpatch(logpatch):
+ m = patterns['commit'].match (logpatch[0])
+ if not m:
+ return None
p = patch(m.group (1))
- NextLine = sys.stdin.readline ()
ignore = (FileFilter is not None)
- while NextLine:
- Line = NextLine
- #
- # If this line starts a new commit, drop out.
- #
- m = patterns['commit'].match (Line)
- if m:
- break
- NextLine = sys.stdin.readline ()
+ for Line in logpatch[1:]:
#
# Maybe it's an author line?
#
@@ -379,20 +366,21 @@ if AkpmOverLt == 1:
Akpm = ('akpm@linux-foundation.org',
LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
-NextLine = sys.stdin.readline ()
TotalChanged = TotalAdded = TotalRemoved = 0
#
# Snarf changesets.
#
print >> sys.stderr, 'Grabbing changesets...\r',
+patches = logparser.LogPatchSplitter(sys.stdin)
printcount = CSCount = 0
-while (1):
+
+for logpatch in patches:
if (printcount % 50) == 0:
print >> sys.stderr, 'Grabbing changesets...%d\r' % printcount,
printcount += 1
- p = grabpatch()
+ p = grabpatch(logpatch)
if not p:
break
# if p.added > 100000 or p.removed > 100000:
View
90 logparser.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+#
+# Copyright © 2009 Germán Póo-Caamaño <gpoo@gnome.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+import sys
+from patterns import patterns
+
+class LogPatchSplitter:
+ """
+ LogPatchSplitters provides a iterator to extract every
+ changeset from a git log output.
+
+ Typical use case:
+
+ patches = LogPatchSplitter(sys.stdin)
+
+ for patch in patches:
+ parse_patch(patch)
+ """
+
+ def __init__(self, fd):
+ self.fd = fd
+ self.buffer = None
+ self.patch = []
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ patch = self.__grab_patch__()
+ if not patch:
+ raise StopIteration
+ return patch
+
+ def __grab_patch__(self):
+ """
+ Extract a patch from the file descriptor and the
+ patch is returned as a list of lines.
+ """
+
+ patch = []
+ line = self.buffer or self.fd.readline()
+
+ while line:
+ m = patterns['commit'].match(line)
+ if m:
+ patch = [line]
+ break
+ line = self.fd.readline()
+
+ if not line:
+ return None
+
+ line = self.fd.readline()
+ while line:
+ # If this line starts a new commit, drop out.
+ m = patterns['commit'].match(line)
+ if m:
+ self.buffer = line
+ break
+
+ patch.append(line)
+ self.buffer = None
+ line = self.fd.readline()
+
+ return patch
+
+
+if __name__ == '__main__':
+ patches = LogPatchSplitter(sys.stdin)
+
+ for patch in patches:
+ print '---------- NEW PATCH ----------'
+ for line in patch:
+ print line,

0 comments on commit 7b26ae2

Please sign in to comment.