## PySAL Change Log Statistics: Table Generation

This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now (2017-07) a meta package. 

It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.

Run this notebook after `gitcount.ipynb`


In [1]:
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output

import yaml
from datetime import datetime, timedelta

from dateutil.parser import parse
import pytz

utc=pytz.UTC

In [2]:
from datetime import datetime, timedelta
from time import sleep
from subprocess import check_output
try:
    from urllib import urlopen
except:
    from urllib.request import urlopen

import ssl
import yaml

context = ssl._create_unverified_context()

In [3]:
with open('packages.yml') as package_file:
    packages = yaml.load(package_file)

In [4]:
CWD = os.path.abspath(os.path.curdir)

In [5]:
CWD

'/home/serge/Dropbox/p/pysal/src/pysal'

Our last main release was `2017-11-03`:

In [6]:
start_date = '2017-11-03'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since

datetime.datetime(2017, 11, 3, 0, 0)

In [7]:
import pickle 

In [8]:
issue_details = pickle.load( open( "issue_details.p", "rb" ) )
pull_details = pickle.load( open( "pull_details.p", "rb" ) )


In [9]:
# get dates of tags
with open('subtags', 'r') as tag_name:
        tags = tag_name.readlines()


In [10]:
tag_dates = {}
#root = '/home/serge/Dropbox/p/pysal/src/pysal/tmp/'
root = CWD + "/tmp/"
for record in tags:
    pkg, tag = record.strip().split()
    tag = tag.split('/')[-1]
    pkdir = root+pkg
    cmd = "git log -1 --format=%ai {tag}".format(tag=tag)
    os.chdir(pkdir)
    #print(cmd)
    result = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE)
    tag_string = result.stdout.decode('utf-8')
    tag_date = tag_string.split()[0]
    tag_dates[pkg] = tag_date
    print(pkg, tag, tag_date)

os.chdir(CWD)

    

libpysal v4.0.1 2018-10-27
esda v2.0.1 2018-11-04
giddy v2.0.0 2018-08-26
inequality v1.0.0 2018-10-31
pointpats V1.1.0 2018-05-17
spaghetti v1.1.0 2018-10-31
mapclassify v2.0.1 2018-10-28
spreg v1.0.4 2018-08-24
spglm v1.0.6 2018-10-31
spint v1.0.4 2018-11-01
mgwr v2.0.1 2018-09-27
spvcm 0.2.0 2018-07-23


In [11]:
# get issues for a package and filter on tag date


for pkg in tag_dates.keys():
    issues = issue_details[pkg]
    tag_date = utc.localize(parse(tag_dates[pkg]))
    keep = []
    for issue in issues:
        closed = parse(issue['closed_at'])
        if closed <= tag_date:
            keep.append(issue)
    print(pkg, len(issues), len(keep))
    issue_details[pkg] = keep
    keep = []
    pulls = pull_details[pkg]
    for pull in pulls:
        closed = parse(pull['closed_at'])
        if closed <= tag_date:
            keep.append(pull)
    print(pkg, len(pulls), len(keep)) 
    pull_details[pkg] = keep
        
  

libpysal 82 79
libpysal 60 57
esda 33 29
esda 22 19
giddy 50 38
giddy 41 31
inequality 11 6
inequality 9 5
pointpats 12 1
pointpats 12 1
spaghetti 185 94
spaghetti 87 34
mapclassify 15 9
mapclassify 13 8
spreg 11 8
spreg 7 6
spglm 15 12
spglm 11 9
spint 14 11
spint 11 9
mgwr 39 30
mgwr 24 16
spvcm 3 2
spvcm 1 1


In [12]:
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        tag_date = tag_dates[subpackage]
        os.chdir(CWD)
        os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
        cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
        ncommits = len(check_output(cmd_until).splitlines())
        ncommits_total = len(check_output(cmd).splitlines())
        print(subpackage, ncommits_total, ncommits)
        total_commits += ncommits
        activity[subpackage] = ncommits

libpysal 276 276
esda 80 80
giddy 137 134
inequality 36 32
pointpats 13 8
spaghetti 282 282
mapclassify 48 48
spreg 57 57
spglm 70 69
spint 57 57
mgwr 229 223
spvcm 21 20


In [13]:
cmd_until

['git', 'log', '--oneline', '--since="2017-11-03"', '--until="2018-07-23"']

In [14]:
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
              'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
              'Wei Kang': ('Wei Kang', 'weikang9009'),
              'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas')
}

def regularize_identity(string):
    string = string.decode()
    for name, aliases in identities.items():
        for alias in aliases:
            if alias in string:
                string = string.replace(alias, name)
    if len(string.split(' '))>1:
        string = string.title()
    return string.lstrip('* ')

In [15]:
author_cmd = ['git', 'log', '--format=* %aN', since_date]

In [16]:
author_cmd.append('blank')

In [17]:
author_cmd

['git', 'log', '--format=* %aN', '--since="2017-11-03"', 'blank']

In [18]:
from collections import Counter

In [19]:
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        os.chdir(CWD)
        os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
        ncommits = len(check_output(cmd).splitlines())
        
        
        tag_date = tag_dates[subpackage]
        author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
        #cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]

        
        all_authors = check_output(author_cmd).splitlines()
        counter = Counter([regularize_identity(author) for author in all_authors])
        global_counter += counter
        counters.update({'.'.join((package,subpackage)): counter})
        unique_authors = sorted(set(all_authors))
        authors[subpackage] =  unique_authors
        authors_global.update(unique_authors)
        total_commits += ncommits
        activity[subpackage] = ncommits

In [20]:
authors_global

{b'* Dani Arribas-Bel',
 b'* James Gaboardi',
 b'* Levi John Wolf',
 b'* Philip Kahn',
 b'* Serge Rey',
 b'* Sergio Rey',
 b'* Stefanie Lumnitz',
 b'* Taylor Oshan',
 b'* Wei Kang',
 b'* Ziqi Li',
 b'* eli knaap',
 b'* ljwolf',
 b'* serge',
 b'* weikang9009'}

In [21]:
activity

{'libpysal': 276,
 'esda': 80,
 'giddy': 137,
 'inequality': 36,
 'pointpats': 13,
 'spaghetti': 282,
 'mapclassify': 48,
 'spreg': 57,
 'spglm': 70,
 'spint': 57,
 'mgwr': 229,
 'spvcm': 21}

In [22]:
counters

{'lib.libpysal': Counter({'Serge Rey': 101,
          'Levi John Wolf': 112,
          'Wei Kang': 18,
          'James Gaboardi': 8,
          'Eli Knaap': 12,
          'Dani Arribas-Bel': 19,
          'Taylor Oshan': 4,
          'Stefanie Lumnitz': 2}),
 'explore.esda': Counter({'Serge Rey': 50,
          'James Gaboardi': 1,
          'Wei Kang': 8,
          'Levi John Wolf': 11,
          'Stefanie Lumnitz': 10}),
 'explore.giddy': Counter({'Wei Kang': 103,
          'Stefanie Lumnitz': 10,
          'Serge Rey': 18,
          'Levi John Wolf': 1,
          'Eli Knaap': 2}),
 'explore.inequality': Counter({'Wei Kang': 11,
          'Serge Rey': 20,
          'Levi John Wolf': 1}),
 'explore.pointpats': Counter({'Serge Rey': 2, 'Levi John Wolf': 6}),
 'explore.spaghetti': Counter({'James Gaboardi': 278,
          'Levi John Wolf': 3,
          'Wei Kang': 1}),
 'viz.mapclassify': Counter({'Wei Kang': 23,
          'Serge Rey': 22,
          'Levi John Wolf': 3}),
 'model.spreg':

In [23]:
counters

{'lib.libpysal': Counter({'Serge Rey': 101,
          'Levi John Wolf': 112,
          'Wei Kang': 18,
          'James Gaboardi': 8,
          'Eli Knaap': 12,
          'Dani Arribas-Bel': 19,
          'Taylor Oshan': 4,
          'Stefanie Lumnitz': 2}),
 'explore.esda': Counter({'Serge Rey': 50,
          'James Gaboardi': 1,
          'Wei Kang': 8,
          'Levi John Wolf': 11,
          'Stefanie Lumnitz': 10}),
 'explore.giddy': Counter({'Wei Kang': 103,
          'Stefanie Lumnitz': 10,
          'Serge Rey': 18,
          'Levi John Wolf': 1,
          'Eli Knaap': 2}),
 'explore.inequality': Counter({'Wei Kang': 11,
          'Serge Rey': 20,
          'Levi John Wolf': 1}),
 'explore.pointpats': Counter({'Serge Rey': 2, 'Levi John Wolf': 6}),
 'explore.spaghetti': Counter({'James Gaboardi': 278,
          'Levi John Wolf': 3,
          'Wei Kang': 1}),
 'viz.mapclassify': Counter({'Wei Kang': 23,
          'Serge Rey': 22,
          'Levi John Wolf': 3}),
 'model.spreg':

In [24]:
issues[0]

{'url': 'https://api.github.com/repos/pysal/spvcm/issues/7',
 'repository_url': 'https://api.github.com/repos/pysal/spvcm',
 'labels_url': 'https://api.github.com/repos/pysal/spvcm/issues/7/labels{/name}',
 'comments_url': 'https://api.github.com/repos/pysal/spvcm/issues/7/comments',
 'events_url': 'https://api.github.com/repos/pysal/spvcm/issues/7/events',
 'html_url': 'https://github.com/pysal/spvcm/pull/7',
 'id': 389048162,
 'node_id': 'MDExOlB1bGxSZXF1ZXN0MjM3MTQ2ODU4',
 'number': 7,
 'title': 'change the API to reflect up-to-date libpysal api',
 'user': {'login': 'ljwolf',
  'id': 2250995,
  'node_id': 'MDQ6VXNlcjIyNTA5OTU=',
  'avatar_url': 'https://avatars3.githubusercontent.com/u/2250995?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/ljwolf',
  'html_url': 'https://github.com/ljwolf',
  'followers_url': 'https://api.github.com/users/ljwolf/followers',
  'following_url': 'https://api.github.com/users/ljwolf/following{/other_user}',
  'gists_url': 'https://api

In [25]:
issues

[{'url': 'https://api.github.com/repos/pysal/spvcm/issues/7',
  'repository_url': 'https://api.github.com/repos/pysal/spvcm',
  'labels_url': 'https://api.github.com/repos/pysal/spvcm/issues/7/labels{/name}',
  'comments_url': 'https://api.github.com/repos/pysal/spvcm/issues/7/comments',
  'events_url': 'https://api.github.com/repos/pysal/spvcm/issues/7/events',
  'html_url': 'https://github.com/pysal/spvcm/pull/7',
  'id': 389048162,
  'node_id': 'MDExOlB1bGxSZXF1ZXN0MjM3MTQ2ODU4',
  'number': 7,
  'title': 'change the API to reflect up-to-date libpysal api',
  'user': {'login': 'ljwolf',
   'id': 2250995,
   'node_id': 'MDQ6VXNlcjIyNTA5OTU=',
   'avatar_url': 'https://avatars3.githubusercontent.com/u/2250995?v=4',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/ljwolf',
   'html_url': 'https://github.com/ljwolf',
   'followers_url': 'https://api.github.com/users/ljwolf/followers',
   'following_url': 'https://api.github.com/users/ljwolf/following{/other_user}',
   'gist

In [26]:
subs = issue_details.keys()
table = []
for sub in subs:
    total= issue_details[sub]
    pr = pull_details[sub]
    
    row = [sub, activity[sub], len(total), len(pr)]
    table.append(row)

In [27]:
table

[['libpysal', 276, 79, 57],
 ['esda', 80, 29, 19],
 ['giddy', 137, 38, 31],
 ['inequality', 36, 6, 5],
 ['pointpats', 13, 1, 1],
 ['spaghetti', 282, 94, 34],
 ['mapclassify', 48, 9, 8],
 ['spreg', 57, 8, 6],
 ['spglm', 70, 12, 9],
 ['spint', 57, 11, 9],
 ['mgwr', 229, 30, 16],
 ['spvcm', 21, 2, 1]]

In [28]:
os.chdir(CWD)

import pandas

In [29]:
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])

In [30]:
df.sort_values(['commits','pulls'], ascending=False)\
  .to_html('./commit_table.html', index=None)

In [31]:
df.sum()

package         libpysalesdagiddyinequalitypointpatsspaghettim...
commits                                                      1306
total issues                                                  319
pulls                                                         196
dtype: object

In [32]:
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T

In [33]:
contributor_table.to_html('./contributor_table.html')

In [34]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')

Unnamed: 0,commits
Dani Arribas-Bel,19
Eli Knaap,14
James Gaboardi,310
Levi John Wolf,216
Philip Kahn,2
Serge Rey,230
Stefanie Lumnitz,22
Taylor Oshan,236
Wei Kang,186
Ziqi Li,51
