## PySAL Change Log Statistics: Table Generation

This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now a meta package. 

It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.

Run this notebook after `100-gitcount.ipynb`


In [59]:
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output

#import yaml
from datetime import datetime, timedelta

from dateutil.parser import parse
import pytz

utc=pytz.UTC

try:
    from urllib import urlopen
except:
    from urllib.request import urlopen




In [60]:
PYSAL_RELEASE = '2021-01-17'
release_date = datetime.strptime(PYSAL_RELEASE+" 0:0:0", "%Y-%m-%d %H:%M:%S")

In [61]:
release_date

datetime.datetime(2021, 1, 17, 0, 0)

In [62]:
CWD = os.path.abspath(os.path.curdir)

In [63]:
CWD

'/home/serge/projects/pysal/tools'

In [64]:
start_date = '2020-02-09'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since

datetime.datetime(2020, 2, 9, 0, 0)

In [65]:
with open('frozen.txt', 'r') as package_list:
    packages = package_list.readlines()
    packages = dict([package.strip().split(">=") for package in packages])

In [66]:
packages

{'libpysal': '4.3.5',
 'access': '1.1.1',
 'esda': '2.3.5',
 'giddy': '2.3.3',
 'inequality': '1.0.0',
 'pointpats': '2.2.0',
 'segregation': '1.5.0',
 'spaghetti': '1.5.6',
 'mgwr': '2.1.2',
 'spglm': '1.0.8',
 'spint': '1.0.7',
 'spreg': '1.2.2',
 'spvcm': '0.3.0',
 'tobler': '0.5.4',
 'mapclassify': '2.4.2',
 'splot': '.1.1.3',
 'spopt': '0.0.5'}

In [67]:
packages['pysal'] = '2.4.0.rc1'

In [68]:
import pickle

In [69]:
issues_closed = pickle.load(open("issues_closed.p", 'rb'))
pulls_closed = pickle.load(open('pulls_closed.p', 'rb'))

In [70]:
type(issues_closed)

dict

In [71]:
issues_closed.keys()

dict_keys(['libpysal', 'access', 'esda', 'giddy', 'inequality', 'pointpats', 'segregation', 'spaghetti', 'mgwr', 'spglm', 'spint', 'spreg', 'spvcm', 'tobler', 'mapclassify', 'splot', 'spopt', 'pysal'])

In [72]:
from release_info import get_pypi_info, get_github_info, clone_masters

In [73]:
#github_releases = get_github_info()

github_releases = pickle.load(open("releases.p", 'rb'))


In [74]:
pypi_releases = get_pypi_info()

In [75]:
from datetime import datetime

In [76]:
pysal_date = datetime.strptime('2021-01-17T12:00:00Z', '%Y-%m-%dT%H:%M:%SZ')
#ISO8601 = "%Y-%m-%dT%H:%M:%SZ"


In [77]:
pysal_rel = {'version': 'v2.4.0rc1',
            'release_date': pysal_date}
github_releases['pysal'] = pysal_rel

In [78]:
github_releases

{'libpysal': {'version': 'v4.3.5',
  'url': 'https://api.github.com/repos/pysal/libpysal/tarball/v4.3.5',
  'release_date': datetime.datetime(2021, 1, 17, 4, 6, 18)},
 'access': {'version': 'v1.1.1',
  'url': 'https://api.github.com/repos/pysal/access/tarball/v1.1.1',
  'release_date': datetime.datetime(2020, 7, 12, 17, 23, 50)},
 'esda': {'version': 'v2.3.5',
  'url': 'https://api.github.com/repos/pysal/esda/tarball/v2.3.5',
  'release_date': datetime.datetime(2020, 12, 30, 3, 18, 36)},
 'giddy': {'version': 'v2.3.3',
  'url': 'https://api.github.com/repos/pysal/giddy/tarball/v2.3.3',
  'release_date': datetime.datetime(2020, 6, 10, 4, 59, 45)},
 'inequality': {'version': 'v1.0.0',
  'url': 'https://api.github.com/repos/pysal/inequality/tarball/v1.0.0',
  'release_date': datetime.datetime(2018, 10, 31, 22, 28, 18)},
 'pointpats': {'version': 'v2.2.0',
  'url': 'https://api.github.com/repos/pysal/pointpats/tarball/v2.2.0',
  'release_date': datetime.datetime(2020, 7, 27, 22, 17, 33)},


In [79]:
from datetime import datetime
datetime.fromtimestamp(0)
ISO8601 = "%Y-%m-%dT%H:%M:%SZ"


final_pulls = {}
final_issues = {}
for package in packages:
    filtered_issues = []
    filtered_pulls = []
    released = github_releases[package]['release_date']
    package_pulls = pulls_closed[package]
    package_issues = issues_closed[package]
    for issue in package_issues:
        #print(issue['number'], issue['title'], issue['closed_at'])
        closed = datetime.strptime(issue['closed_at'], ISO8601)
        if closed <= released:
            filtered_issues.append(issue)
    final_issues[package] = filtered_issues
    for pull in package_pulls:
        #print(pull['number'], pull['title'], pull['closed_at'])
        closed = datetime.strptime(pull['closed_at'], ISO8601)
        if closed <= released:
            filtered_pulls.append(pull)
    final_pulls[package] = filtered_pulls
    print(package, released, len(package_issues), len(filtered_issues), len(package_pulls),
         len(filtered_pulls))

libpysal 2021-01-17 04:06:18 100 99 0 0
access 2020-07-12 17:23:50 14 6 7 5
esda 2020-12-30 03:18:36 45 43 28 26
giddy 2020-06-10 04:59:45 34 28 0 0
inequality 2018-10-31 22:28:18 0 0 0 0
pointpats 2020-07-27 22:17:33 11 9 8 7
segregation 2020-12-30 03:31:01 20 18 0 0
spaghetti 2021-01-04 20:50:06 100 96 0 0
mgwr 2020-09-08 21:20:34 6 5 4 4
spglm 2020-09-08 20:34:08 4 4 3 3
spint 2020-09-09 02:28:50 5 2 3 2
spreg 2021-01-15 19:15:15 33 32 17 17
spvcm 2020-02-02 19:42:39 0 0 0 0
tobler 2020-12-22 18:14:45 70 54 46 34
mapclassify 2020-12-22 23:35:40 52 52 30 30
splot 2020-03-23 11:53:30 22 4 13 4
spopt 2021-01-17 18:44:46 100 97 62 62
pysal 2021-01-17 12:00:00 79 74 0 0


In [80]:
issue_details = final_issues
pull_details = final_pulls

In [81]:
packages

{'libpysal': '4.3.5',
 'access': '1.1.1',
 'esda': '2.3.5',
 'giddy': '2.3.3',
 'inequality': '1.0.0',
 'pointpats': '2.2.0',
 'segregation': '1.5.0',
 'spaghetti': '1.5.6',
 'mgwr': '2.1.2',
 'spglm': '1.0.8',
 'spint': '1.0.7',
 'spreg': '1.2.2',
 'spvcm': '0.3.0',
 'tobler': '0.5.4',
 'mapclassify': '2.4.2',
 'splot': '.1.1.3',
 'spopt': '0.0.5',
 'pysal': '2.4.0.rc1'}

In [82]:
github_releases['pysal']['release_date'] = release_date

In [83]:
released

datetime.datetime(2021, 1, 17, 12, 0)

In [84]:
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
tag_dates = {}
for subpackage in packages:
    released = github_releases[subpackage]['release_date']
    tag_date = released.strftime("%Y-%m-%d")
    tag_dates[subpackage] = tag_date
    #print(tag_date)
    #tag_date = tag_dates[subpackage]
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
    ncommits = len(check_output(cmd_until).splitlines())
    ncommits_total = len(check_output(cmd).splitlines())
    print(subpackage, ncommits_total, ncommits, tag_date)
    total_commits += ncommits
    activity[subpackage] = ncommits

libpysal 352 352 2021-01-17
access 137 77 2020-07-12
esda 181 181 2020-12-30
giddy 111 103 2020-06-10
inequality 0 0 2018-10-31
pointpats 100 97 2020-07-27
segregation 62 59 2020-12-30
spaghetti 562 558 2021-01-04
mgwr 13 13 2020-09-08
spglm 6 6 2020-09-08
spint 14 11 2020-09-09
spreg 182 182 2021-01-15
spvcm 0 0 2020-02-02
tobler 207 125 2020-12-22
mapclassify 175 168 2020-12-22
splot 30 14 2020-03-23
spopt 276 276 2021-01-17
pysal 142 134 2021-01-17


In [85]:
CWD

'/home/serge/projects/pysal/tools'

In [86]:
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
for subpackage in packages:
    tag_date = tag_dates[subpackage]
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
    ncommits = len(check_output(cmd_until).splitlines())
    ncommits_total = len(check_output(cmd).splitlines())
    print(subpackage, ncommits_total, ncommits, tag_date)
    total_commits += ncommits
    activity[subpackage] = ncommits

libpysal 352 352 2021-01-17
access 137 77 2020-07-12
esda 181 181 2020-12-30
giddy 111 103 2020-06-10
inequality 0 0 2018-10-31
pointpats 100 97 2020-07-27
segregation 62 59 2020-12-30
spaghetti 562 558 2021-01-04
mgwr 13 13 2020-09-08
spglm 6 6 2020-09-08
spint 14 11 2020-09-09
spreg 182 182 2021-01-15
spvcm 0 0 2020-02-02
tobler 207 125 2020-12-22
mapclassify 175 168 2020-12-22
splot 30 14 2020-03-23
spopt 276 276 2021-01-17
pysal 142 134 2021-01-17


In [87]:
cmd_until

['git', 'log', '--oneline', '--since="2020-02-09"', '--until="2021-01-17"']

In [88]:
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
              'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
              'Wei Kang': ('Wei Kang', 'weikang9009'),
              'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas'),
              'Antti Härkönen': ( 'antth', 'Antti Härkönen', 'Antti Härkönen', 'Antth'  ),
              'Juan C Duque': ('Juan C Duque', "Juan Duque"),
              'Renan Xavier Cortes': ('Renan Xavier Cortes', 'renanxcortes', 'Renan Xavier Cortes'   ),
              'Taylor Oshan': ('Tayloroshan', 'Taylor Oshan', 'TaylorOshan'),
              'Tom Gertin': ('@Tomgertin', 'Tom Gertin', '@tomgertin')
}

def regularize_identity(string):
    string = string.decode()
    for name, aliases in identities.items():
        for alias in aliases:
            if alias in string:
                string = string.replace(alias, name)
    if len(string.split(' '))>1:
        string = string.title()
    return string.lstrip('* ')

In [89]:
author_cmd = ['git', 'log', '--format=* %aN', since_date]

In [90]:
author_cmd.append('blank')

In [91]:
author_cmd

['git', 'log', '--format=* %aN', '--since="2020-02-09"', 'blank']

In [92]:
from collections import Counter

In [93]:
tag_dates

{'libpysal': '2021-01-17',
 'access': '2020-07-12',
 'esda': '2020-12-30',
 'giddy': '2020-06-10',
 'inequality': '2018-10-31',
 'pointpats': '2020-07-27',
 'segregation': '2020-12-30',
 'spaghetti': '2021-01-04',
 'mgwr': '2020-09-08',
 'spglm': '2020-09-08',
 'spint': '2020-09-09',
 'spreg': '2021-01-15',
 'spvcm': '2020-02-02',
 'tobler': '2020-12-22',
 'mapclassify': '2020-12-22',
 'splot': '2020-03-23',
 'spopt': '2021-01-17',
 'pysal': '2021-01-17'}

In [94]:
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for subpackage in packages:
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    ncommits = len(check_output(cmd).splitlines())
    tag_date = tag_dates[subpackage]
    tag_date = (datetime.strptime(tag_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')
    author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
    #cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
    print(author_cmd)


    all_authors = check_output(author_cmd).splitlines()
    counter = Counter([regularize_identity(author) for author in all_authors])
    global_counter += counter
    counters.update({subpackage: counter})
    unique_authors = sorted(set(all_authors))
    authors[subpackage] =  unique_authors
    authors_global.update(unique_authors)
    total_commits += ncommits
    activity[subpackage] = ncommits

['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2021-01-18"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-13"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-12-31"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-06-11"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2018-11-01"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-28"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-12-31"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2021-01-05"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-09-09"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-09-09"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-09-10"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2021-01-16"']
['git', 'log', '--format=* %

In [95]:
counter

Counter({'Serge Rey': 87,
         'James Gaboardi': 11,
         'Andrew Annex': 3,
         'Serge': 11,
         'Eli Knaap': 22,
         'Wei Kang': 8})

In [96]:
authors_global

{b'* Andrew Annex',
 b'* Arfon Smith',
 b'* Bryan Bennett',
 b'* Dani Arribas-Bel',
 b'* Elliott Sales de Andrade',
 b'* JCS Laptop',
 b'* James Gaboardi',
 b'* Jamie Saxon',
 b'* Jeffery Sauer',
 b'* Levi John Wolf',
 b'* Martin Fleischmann',
 b'* MgeeeeK',
 b'* Omar',
 b'* Pablo Estrada',
 b'* Pattyf',
 b'* Pedro Amaral',
 b'* Serge',
 b'* Serge Rey',
 b'* Sergio Rey',
 b'* Stefanie Lumnitz',
 b'* Sugam Srivastava',
 b'* Taylor Oshan',
 b'* Wagner',
 b'* Wei Kang',
 b'* Xin (Selena) Feng',
 b'* Xin Feng',
 b'* Ziqi Li',
 b'* dependabot[bot]',
 b'* eli knaap',
 b'* jkoschinsky',
 b'* knaaptime',
 b'* ljwolf',
 b'* pabloestradac',
 b'* pedrovma',
 b'* rwei5',
 b'* vidal-anguiano',
 b'* weikang9009'}

In [97]:
activity

{'libpysal': 352,
 'access': 137,
 'esda': 181,
 'giddy': 111,
 'inequality': 0,
 'pointpats': 100,
 'segregation': 62,
 'spaghetti': 562,
 'mgwr': 13,
 'spglm': 6,
 'spint': 14,
 'spreg': 182,
 'spvcm': 0,
 'tobler': 207,
 'mapclassify': 175,
 'splot': 30,
 'spopt': 276,
 'pysal': 142}

In [98]:
counters

{'libpysal': Counter({'Serge Rey': 115,
          'James Gaboardi': 176,
          'Serge': 4,
          'Dependabot[Bot]': 15,
          'Martin Fleischmann': 8,
          'Stefanie Lumnitz': 1,
          'Mgeeeek': 7,
          'Elliott Sales De Andrade': 5,
          'Dani Arribas-Bel': 1,
          'Levi John Wolf': 16,
          'Bryan Bennett': 1,
          'Jeffery Sauer': 1,
          'Wagner': 2}),
 'access': Counter({'Jkoschinsky': 6,
          'Jamie Saxon': 41,
          'Vidal-Anguiano': 32}),
 'esda': Counter({'James Gaboardi': 19,
          'Dependabot[Bot]': 6,
          'Serge Rey': 43,
          'Serge': 9,
          'Martin Fleischmann': 1,
          'Levi John Wolf': 62,
          'Wei Kang': 1,
          'Dani Arribas-Bel': 38,
          'Jeffery Sauer': 1,
          'Eli Knaap': 1}),
 'giddy': Counter({'Wei Kang': 102, 'Serge Rey': 1}),
 'inequality': Counter(),
 'pointpats': Counter({'Serge Rey': 16,
          'Serge': 1,
          'Levi John Wolf': 75,
         

In [99]:
counters

{'libpysal': Counter({'Serge Rey': 115,
          'James Gaboardi': 176,
          'Serge': 4,
          'Dependabot[Bot]': 15,
          'Martin Fleischmann': 8,
          'Stefanie Lumnitz': 1,
          'Mgeeeek': 7,
          'Elliott Sales De Andrade': 5,
          'Dani Arribas-Bel': 1,
          'Levi John Wolf': 16,
          'Bryan Bennett': 1,
          'Jeffery Sauer': 1,
          'Wagner': 2}),
 'access': Counter({'Jkoschinsky': 6,
          'Jamie Saxon': 41,
          'Vidal-Anguiano': 32}),
 'esda': Counter({'James Gaboardi': 19,
          'Dependabot[Bot]': 6,
          'Serge Rey': 43,
          'Serge': 9,
          'Martin Fleischmann': 1,
          'Levi John Wolf': 62,
          'Wei Kang': 1,
          'Dani Arribas-Bel': 38,
          'Jeffery Sauer': 1,
          'Eli Knaap': 1}),
 'giddy': Counter({'Wei Kang': 102, 'Serge Rey': 1}),
 'inequality': Counter(),
 'pointpats': Counter({'Serge Rey': 16,
          'Serge': 1,
          'Levi John Wolf': 75,
         

In [100]:
def get_tag(title, level="##", as_string=True):
    words = title.split()
    tag = "-".join([word.lower() for word in words])
    heading = level+" "+title
    line = "\n\n<a name=\"{}\"></a>".format(tag)
    lines = [line]
    lines.append(heading)
    if as_string:
        return "\n".join(lines)
    else:
        return lines

In [101]:
subs = issue_details.keys()
table = []
txt = []
lines = get_tag("Changes by Package", as_string=False)

for sub in subs:
    total= issue_details[sub]
    pr = pull_details[sub]
    
    row = [sub, activity[sub], len(total), len(pr)]
    table.append(row)
    #line = "\n<a name=\"{sub}\"></a>".format(sub=sub)
    #lines.append(line)
    #line = "### {sub}".format(sub=sub)
    #lines.append(line)
    lines.extend(get_tag(sub.lower(), "###", as_string=False))
    for issue in total:
        url = issue['html_url']
        title = issue['title']
        number = issue['number']
        line = "* [#{number}:]({url}) {title} ".format(title=title,
                                                     number=number,
                                                     url=url)
        lines.append(line)



In [102]:
table

[['libpysal', 352, 99, 0],
 ['access', 137, 6, 5],
 ['esda', 181, 43, 26],
 ['giddy', 111, 28, 0],
 ['inequality', 0, 0, 0],
 ['pointpats', 100, 9, 7],
 ['segregation', 62, 18, 0],
 ['spaghetti', 562, 96, 0],
 ['mgwr', 13, 5, 4],
 ['spglm', 6, 4, 3],
 ['spint', 14, 2, 2],
 ['spreg', 182, 32, 17],
 ['spvcm', 0, 0, 0],
 ['tobler', 207, 54, 34],
 ['mapclassify', 175, 52, 30],
 ['splot', 30, 4, 4],
 ['spopt', 276, 97, 62],
 ['pysal', 142, 74, 0]]

In [103]:
os.chdir(CWD)

import pandas

In [104]:
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])

In [105]:
df.head()

Unnamed: 0,package,commits,total issues,pulls
0,libpysal,352,99,0
1,access,137,6,5
2,esda,181,43,26
3,giddy,111,28,0
4,inequality,0,0,0


In [106]:
df.sort_values(['commits','pulls'], ascending=False)\
  .to_html('./commit_table.html', index=None)

In [107]:
df.sum()

package         libpysalaccessesdagiddyinequalitypointpatssegr...
commits                                                      2550
total issues                                                  623
pulls                                                         194
dtype: object

In [108]:
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T

In [109]:
contributor_table.to_html('./contributor_table.html')

In [110]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')

Unnamed: 0,commits
Andrew Annex,3
Arfon Smith,1
Bryan Bennett,1
Dani Arribas-Bel,41
Dependabot[Bot],45
Eli Knaap,214
Elliott Sales De Andrade,5
James Gaboardi,963
Jamie Saxon,41
Jcs Laptop,1


In [111]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits').to_html('./commits_by_person.html')

In [112]:
totals

Serge Rey                   427
James Gaboardi              963
Serge                        35
Dependabot[Bot]              45
Martin Fleischmann           28
Stefanie Lumnitz             39
Mgeeeek                       7
Elliott Sales De Andrade      5
Dani Arribas-Bel             41
Levi John Wolf              154
Bryan Bennett                 1
Jeffery Sauer                 2
Wagner                        2
Jkoschinsky                   6
Jamie Saxon                  41
Vidal-Anguiano               32
Wei Kang                    119
Eli Knaap                   214
Sugam Srivastava              4
Knaaptime                     5
Omar                          2
Taylor Oshan                 19
Ziqi Li                       9
Pedro Amaral                 68
Pabloestradac                14
Pablo Estrada                23
Pedrovma                      9
Pattyf                        1
Jcs Laptop                    1
Arfon Smith                   1
Xin (Selena) Feng            21
Xin Feng

In [113]:
n_commits = df.commits.sum()
n_issues = df['total issues'].sum()
n_pulls = df.pulls.sum()

In [114]:
n_commits

2550

In [115]:
#Overall, there were 719 commits that closed 240 issues, together with 105 pull requests across 12 packages since our last release on 2017-11-03.
#('{0} Here is a really long '
#           'sentence with {1}').format(3, 5))
line = ('Overall, there were {n_commits} commits that closed {n_issues} issues,'  
    ' together with {n_pulls} pull requests since our last release' 
        ' on {since_date}.\n'.format(n_commits=n_commits, n_issues=n_issues,
        n_pulls=n_pulls, since_date = start_date))

In [116]:
line

'Overall, there were 2550 commits that closed 623 issues, together with 194 pull requests since our last release on 2020-02-09.\n'

## append html files to end of changes.md with tags for toc

In [134]:
with open('changes.md', 'w') as fout:
    fout.write(line)
    fout.write("\n".join(lines))
    fout.write(get_tag("Contributors"))
    fout.write("\n\nMany thanks to all of the following individuals who contributed to this release:\n\n")
    
    
    
    totals = contributor_table.sum(axis=0).T
    contributors = totals.index.values
    contributors.sort()
    contributors = contributors.tolist() 
    contributors = [ f'\n - {contributor}' for contributor in contributors]
    fout.write("".join(contributors))
    


In [130]:
contributors = totals.index.values
contributors.sort()
contributors.tolist()

['Andrew Annex',
 'Arfon Smith',
 'Bryan Bennett',
 'Dani Arribas-Bel',
 'Dependabot[Bot]',
 'Eli Knaap',
 'Elliott Sales De Andrade',
 'James Gaboardi',
 'Jamie Saxon',
 'Jcs Laptop',
 'Jeffery Sauer',
 'Jkoschinsky',
 'Knaaptime',
 'Levi John Wolf',
 'Martin Fleischmann',
 'Mgeeeek',
 'Omar',
 'Pablo Estrada',
 'Pabloestradac',
 'Pattyf',
 'Pedro Amaral',
 'Pedrovma',
 'Rwei5',
 'Serge',
 'Serge Rey',
 'Stefanie Lumnitz',
 'Sugam Srivastava',
 'Taylor Oshan',
 'Vidal-Anguiano',
 'Wagner',
 'Wei Kang',
 'Xin (Selena) Feng',
 'Xin Feng',
 'Ziqi Li']

In [123]:
with open('changes.md', 'w') as fout:
    fout.write(line)
    fout.write("\n".join(lines))
    fout.write(get_tag("Summary Statistics"))
    
    with open('commit_table.html') as table:
        table_lines = table.readlines()
        title = "Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
                
    with open('commits_by_person.html') as table:
        table_lines = table.readlines()
        title = "Contributor Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
        
    with open('contributor_table.html') as table:
        table_lines = table.readlines()
        title = "Contributor by Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))