Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Add version tracking support and an "unknown hackers" report

Version tracking was used to see who had contributed to the most kernel
releases; not sure it's a long-term-useful feature.  The unknown hackers
report helps when trying to improve the database.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
  • Loading branch information...
commit 1e293bc90a2a0d53340c96e74c9391049f4e7165 1 parent 2797636
Jonathan Corbet authored

Showing 3 changed files with 82 additions and 7 deletions. Show diff stats Hide diff stats

  1. +10 4 gitdm
  2. +2 1  patterns.py
  3. +70 2 reports.py
14 gitdm
@@ -5,8 +5,8 @@
5 5 #
6 6 # This code is part of the LWN git data miner.
7 7 #
8   -# Copyright 2007-11 Eklektix, Inc.
9   -# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
  8 +# Copyright 2007-12 Eklektix, Inc.
  9 +# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
10 10 # Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
11 11 #
12 12 # This file may be distributed under the terms of the GNU General
@@ -43,6 +43,7 @@ DirName = ''
43 43 Aggregate = 'month'
44 44 Numstat = 0
45 45 ReportByFileType = 0
  46 +ReportUnknowns = False
46 47
47 48 #
48 49 # Options:
@@ -60,6 +61,7 @@ ReportByFileType = 0
60 61 # -r pattern Restrict to files matching pattern
61 62 # -s Ignore author SOB lines
62 63 # -u Map unknown employers to '(Unknown)'
  64 +# -U Dump unknown hackers in report
63 65 # -x file.csv Export raw statistics as CSV
64 66 # -w Aggregrate the raw statistics by weeks instead of months
65 67 # -y Aggregrate the raw statistics by years instead of months
@@ -69,9 +71,9 @@ def ParseOpts ():
69 71 global MapUnknown, DevReports
70 72 global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
71 73 global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
72   - global ReportByFileType
  74 + global ReportByFileType, ReportUnknowns
73 75
74   - opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stuwx:yz')
  76 + opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stUuwx:yz')
75 77 for opt in opts:
76 78 if opt[0] == '-a':
77 79 AkpmOverLt = 1
@@ -102,6 +104,8 @@ def ParseOpts ():
102 104 ReportByFileType = 1
103 105 elif opt[0] == '-u':
104 106 MapUnknown = 1
  107 + elif opt[0] == '-U':
  108 + ReportUnknowns = True
105 109 elif opt[0] == '-x':
106 110 CSVFile = open (opt[1], 'w')
107 111 print "open output file " + opt[1] + "\n"
@@ -492,6 +496,8 @@ if CSVFile:
492 496
493 497 if DevReports:
494 498 reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved)
  499 +if ReportUnknowns:
  500 + reports.ReportUnknowns(hlist, CSCount)
495 501 reports.EmplReports (elist, TotalChanged, CSCount)
496 502
497 503 if ReportByFileType and Numstat:
3  patterns.py
@@ -21,7 +21,8 @@
21 21 _pemail = r'\s+"?([^<"]+)"?\s<([^>]+)>' # just email addr + name
22 22
23 23 patterns = {
24   - 'commit': re.compile (r'^commit ([0-9a-f ]+)$'),
  24 + 'tagcommit': re.compile (r'^commit ([\da-f]+) .*tag: (v[23]\.\d(\.\d\d?)?)'),
  25 + 'commit': re.compile (r'^commit ([0-9a-f ]+)'),
25 26 'author': re.compile (r'^Author:' + _pemail + '$'),
26 27 'signed-off-by': re.compile (r'^\s+Signed-off-by:' + _pemail + '.*$'),
27 28 'merge': re.compile (r'^Merge:.*$'),
72 reports.py
@@ -3,8 +3,8 @@
3 3 #
4 4 # This code is part of the LWN git data miner.
5 5 #
6   -# Copyright 2007-11 Eklektix, Inc.
7   -# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
  6 +# Copyright 2007-12 Eklektix, Inc.
  7 +# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
8 8 #
9 9 # This file may be distributed under the terms of the GNU General
10 10 # Public License, version 2.
@@ -58,6 +58,10 @@ def BeginReport (title):
58 58 <td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>
59 59 '''
60 60
  61 +TRowStr = ''' <tr class="%s">
  62 +<td>%s</td><td align="right">%d</td><td>%s</td></tr>
  63 +'''
  64 +
61 65 def ReportLine (text, count, pct):
62 66 global HTMLclass
63 67 if count == 0:
@@ -67,6 +71,15 @@ def ReportLine (text, count, pct):
67 71 HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct))
68 72 HTMLclass ^= 1
69 73
  74 +def ReportLineStr (text, count, extra):
  75 + global HTMLclass
  76 + if count == 0:
  77 + return
  78 + Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
  79 + if HTMLfile:
  80 + HTMLfile.write (TRowStr % (HClasses[HTMLclass], text, count, extra))
  81 + HTMLclass ^= 1
  82 +
70 83 def EndReport ():
71 84 if HTMLfile:
72 85 HTMLfile.write ('</table>\n\n')
@@ -284,6 +297,34 @@ def ReportByRepCreds (hlist):
284 297 break
285 298 EndReport ()
286 299
  300 +#
  301 +# Versions.
  302 +#
  303 +def CompareVersionCounts (h1, h2):
  304 + if h1.versions and h2.versions:
  305 + return len (h2.versions) - len (h1.versions)
  306 + if h2.versions:
  307 + return 1
  308 + if h1.versions:
  309 + return -1
  310 + return 0
  311 +
  312 +def MissedVersions (hv, allv):
  313 + missed = [v for v in allv if v not in hv]
  314 + missed.reverse ()
  315 + return ' '.join (missed)
  316 +
  317 +def ReportVersions (hlist):
  318 + hlist.sort (CompareVersionCounts)
  319 + BeginReport ('Developers represented in the most kernel versions')
  320 + count = 0
  321 + allversions = hlist[0].versions
  322 + for h in hlist:
  323 + ReportLineStr (h.name, len (h.versions), MissedVersions (h.versions, allversions))
  324 + count += 1
  325 + if count >= ListCount:
  326 + break
  327 + EndReport ()
287 328
288 329
289 330 def CompareESOBs (e1, e2):
@@ -341,6 +382,33 @@ def EmplReports (elist, totalchanged, cscount):
341 382 ReportByESOBs (elist)
342 383 ReportByEHackers (elist)
343 384
  385 +#
  386 +# Who are the unknown hackers?
  387 +#
  388 +def IsUnknown(h):
  389 + empl = h.employer[0][0][1].name
  390 + return h.email[0] == empl or empl == '(Unknown)'
  391 +
  392 +def ReportUnknowns(hlist, cscount):
  393 + #
  394 + # Trim the list to just the unknowns; try to work properly whether
  395 + # mapping to (Unknown) is happening or not.
  396 + #
  397 + ulist = [ h for h in hlist if IsUnknown(h) ]
  398 + ulist.sort(ComparePCount)
  399 + count = 0
  400 + BeginReport('Developers with unknown affiliation')
  401 + for h in ulist:
  402 + pcount = len(h.patches)
  403 + if pcount > 0:
  404 + ReportLine(h.name, pcount, (pcount*100.0)/cscount)
  405 + count += 1
  406 + if count >= ListCount:
  407 + break
  408 + EndReport()
  409 +
  410 +
  411 +
344 412 def ReportByFileType (hacker_list):
345 413 total = {}
346 414 total_by_hacker = {}

0 comments on commit 1e293bc

Please sign in to comment.
Something went wrong with that request. Please try again.