Skip to content
This repository has been archived by the owner on Jan 6, 2022. It is now read-only.

Commit

Permalink
Merge branch 'refactoring' of git://gitorious.org/mining-tools/gitdm …
Browse files Browse the repository at this point in the history
…into german
  • Loading branch information
Jonathan Corbet committed Jul 11, 2011
2 parents 85004f0 + 69f9ad7 commit 47ffed3
Show file tree
Hide file tree
Showing 11 changed files with 926 additions and 159 deletions.
141 changes: 106 additions & 35 deletions ConfigFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,42 @@
import sys, re, datetime, os.path
import database

#
# Read a line and strip out junk.
#
def ReadConfigLine (file):
line = file.readline ()
if not line:
return None
line = line.split('#')[0] # Get rid of any comments
line = line.strip () # and extra white space
if len (line) == 0: # we got rid of everything
return ReadConfigLine (file)
return line
class ReadConfigLine:
"""
ReadConfigLine provides a iterator to extract line
from an config file without comments.
Typical use case:
fd = open(filename, 'r')
for line in ReadConfigLine(fd):
parse_line(line)
fd.close(fd)
"""

def __init__(self, fd):
self.fd = fd
self.buffer = None
self.patch = []

def __iter__(self):
return self

def next(self):
line = self.fd.readline()
while line:
line = line.split('#')[0] # Get rid of any comments
line = line.strip() # and extra white space
if len(line) == 0: # we got rid of everything
line = self.fd.readline()
else:
break

if not line:
raise StopIteration

return line


#
# Give up and die.
Expand All @@ -38,19 +62,19 @@ def croak (message):
#
def ReadEmailAliases (name):
try:
file = open (name, 'r')
fd = open (name, 'r')
except IOError:
croak ('Unable to open email alias file %s' % (name))
line = ReadConfigLine (file)
while line:

for line in ReadConfigLine (fd):
m = re.match ('^("[^"]+"|\S+)\s+(.+)$', line)
if not m or len (m.groups ()) != 2:
croak ('Funky email alias line "%s"' % (line))
if m and m.group (2).find ('@') <= 0:
croak ('Non-addresses in email alias "%s"' % (line))
database.AddEmailAlias (m.group (1).replace ('"', ''), m.group (2))
line = ReadConfigLine (file)
file.close ()

fd.close ()

#
# The Email/Employer map
Expand All @@ -59,20 +83,20 @@ def ReadEmailAliases (name):

def ReadEmailEmployers (name):
try:
file = open (name, 'r')
fd = open (name, 'r')
except IOError:
croak ('Unable to open email/employer file %s' % (name))
line = ReadConfigLine (file)
while line:

for line in ReadConfigLine (fd):
m = EMMpat.match (line)
if not m:
croak ('Funky email/employer line "%s"' % (line))
email = m.group (1)
company = m.group (2).strip ()
enddate = ParseDate (m.group (4))
database.AddEmailEmployerMapping (email, company, enddate)
line = ReadConfigLine (file)
file.close ()

fd.close ()

def ParseDate (cdate):
if not cdate:
Expand All @@ -83,22 +107,22 @@ def ParseDate (cdate):

def ReadGroupMap (fname, employer):
try:
file = open (fname, 'r')
fd = open (fname, 'r')
except IOError:
croak ('Unable to open group map file %s' % (fname))
line = ReadConfigLine (file)
while line:

for line in ReadConfigLine (fd):
database.AddEmailEmployerMapping (line, employer)
line = ReadConfigLine (file)
file.close ()

fd.close ()

#
# Read in a virtual employer description.
#
def ReadVirtual (file, name):
def ReadVirtual (fd, name):
ve = database.VirtualEmployer (name)
line = ReadConfigLine (file)
while line:

for line in ReadConfigLine (fd):
sl = line.split (None, 1)
first = sl[0]
if first == 'end':
Expand All @@ -116,23 +140,57 @@ def ReadVirtual (file, name):
if not (0 < percent <= 100):
croak ('Bad split value "%s" for virtual empl %s' % (first, name))
ve.addsplit (' '.join (sl[1:]), percent/100.0)
line = ReadConfigLine (file)
#
# We should never get here
#
croak ('Missing "end" line for virtual employer %s' % (name))

#
# Read file type patterns for more fine graned reports
#
def ReadFileType (filename):
try:
fd = open (filename, 'r')
except IOError:
croak ('Unable to open file type mapping file %s' % (filename))
patterns = {}
order = []
regex_order = re.compile ('^order\s+(.*)$')
regex_file_type = re.compile ('^filetype\s+(\S+)\s+(.+)$')

for line in ReadConfigLine (fd):
o = regex_order.match (line)
if o:
# Consider only the first definition in the config file
elements = o.group(1).replace (' ', '')
order = order or elements.split(',')
continue

m = regex_file_type.match (line)
if not m or len (m.groups ()) != 2:
ConfigFile.croak ('Funky file type line "%s"' % (line))
if not patterns.has_key (m.group (1)):
patterns[m.group (1)] = []
if m.group (1) not in order:
print '%s not found, appended to the last order' % m.group (1)
order.append (m.group (1))

patterns[m.group (1)].append (re.compile (m.group (2), re.IGNORECASE))

fd.close ()
return patterns, order

#
# Read an overall config file.
#

def ConfigFile (name, confdir):
try:
file = open (name, 'r')
fd = open (name, 'r')
except IOError:
croak ('Unable to open config file %s' % (name))
line = ReadConfigLine (file)
while line:

for line in ReadConfigLine (fd):
sline = line.split (None, 2)
if len (sline) < 2:
croak ('Funky config line: "%s"' % (line))
Expand All @@ -146,7 +204,20 @@ def ConfigFile (name, confdir):
ReadGroupMap (os.path.join (confdir, sline[1]), sline[2])
elif sline[0] == 'VirtualEmployer':
ReadVirtual (file, ' '.join (sline[1:]))
elif sline[0] == 'FileTypeMap':
patterns, order = ReadFileType (os.path.join (confdir, sline[1]))
database.FileTypes = database.FileType (patterns, order)
else:
croak ('Unrecognized config line: "%s"' % (line))
line = ReadConfigLine (file)


if __name__ == '__main__':
'''Test the iterato for reading configuration files'''
try:
fd = open(sys.argv[1])
except:
croak('Usage: %s <config-file>' % sys.argv[0])

for line in ReadConfigLine(fd):
print line

47 changes: 41 additions & 6 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Run it like this:

git log -p -M [details] | gitdm [options]

Alternatively, you can run with:

git log --numstat -M [details] | gitdm -n [options]

The [details] tell git which changesets are of interest; the [options] can
be:

Expand All @@ -32,26 +36,35 @@ be:
By default, "./gitdm.config" is used.

-d Omit the developer reports, giving employer information
only.
only.

-D Rather than create the usual statistics, create a
file (datelc) providing lines changed per day, where the first column
displays the changes happened only on that day and the second sums
the day it happnened with the previous ones. This option is suitable
for feeding to a tool like gnuplot.
-D Rather than create the usual statistics, create a file (datelc.csv)
providing lines changed per day, where the first column displays
the changes happened only on that day and the second sums the day it
happnened with the previous ones. This option is suitable for
feeding to a tool like gnuplot.

-h file Generate HTML output to the given file

-l num Only list the top <num> entries in each report.

-n Use --numstat instead of generated patches to get the statistics.

-o file Write text output to the given file (default is stdout).

-p prefix Dump out the database categorized by changeset and by file type.
It requires -n, otherwise it is not possible to get separated results.

-r pat Only generate statistics for changes to files whose
name matches the given regular expression.

-s Ignore Signed-off-by lines which match the author of
each patch.

-t Generate a report by type of contribution (code, documentation, etc.).
It requires -n, otherwise this option is ignored silently.


-u Group all unknown developers under the "(Unknown)"
employer.

Expand All @@ -68,6 +81,10 @@ looks like:
git log -p -M v2.6.19..v2.6.20 | \
gitdm -u -s -a -o results -h results.html

or:

git log --numstat -M v2.6.19..v2.6.20 | \
gitdm -u -s -a -n -o results -h results.html

CONFIGURATION FILE

Expand Down Expand Up @@ -134,6 +151,24 @@ end
for example, no check to ensure that the percentages add up to
something rational.

FileTypeMap file

Map file names/extensions onto file types. These files contain lines
like:

order <type1>,<type2>,...,<typeN>

filetype <type> <regex>
...

This construct allows fine graned reports by type of contribution
(build, code, image, multimedia, documentation, etc.)

Order is important because it is possible to have overlapping between
filenames. For instance, ltmain.sh fits better as 'build' instead of
'code' (the filename instead of '\.sh$'). The first element in order
has precedence over the next ones.


OTHER TOOLS

Expand Down
40 changes: 0 additions & 40 deletions csv.py

This file was deleted.

Loading

0 comments on commit 47ffed3

Please sign in to comment.