## Setup

In [1]:
import pandas
import plotly.plotly as plot
import cufflinks
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks
init_notebook_mode(connected=True)

## Raw Data

In [2]:
data = pandas.read_csv('./commit_history.csv')
data['date'] = pandas.to_datetime(data['date'])
data = data.loc[~data.message.str.startswith('Merge pull request')]
data

Unnamed: 0,date,author,committer,message,lines,insertions,deletions,net
0,2017-10-10 12:41:50,Eelco Dolstra,Eelco Dolstra,linux: 4.9.53 -> 4.9.54\n,4,2,2,0
1,2017-10-10 12:26:24,Eelco Dolstra,Eelco Dolstra,Enable command-not-found\n\n5a5db609e5bd83bc58...,9,8,1,7
2,2017-10-10 12:05:00,Eelco Dolstra,Eelco Dolstra,gdb: Remove --with-separate-debug-dir\n\nWe do...,5,1,4,-3
3,2017-10-10 12:04:57,Eelco Dolstra,Eelco Dolstra,Set $NIX_DEBUG_INFO_DIRS when environment.enab...,7,4,3,1
4,2017-10-10 11:18:57,Gabriel Ebner,Gabriel Ebner,isabelle: 2016-1 -> 2017\n,14,9,5,4
5,2017-10-10 11:18:38,Peter Hoeg,Peter Hoeg,netalyzr: use makeWrapper i/o custom script\n,14,5,9,-4
6,2017-10-10 09:21:58,Vincent Laporte,Vincent Laporte,antlr4: enable more platforms\n,2,1,1,0
7,2017-10-10 07:51:49,volth,vbgl,haxe: 3.4.3 -> 3.4.4,4,2,2,0
8,2017-10-10 06:55:21,Peter Hoeg,Peter Hoeg,vivaldi: do not build broken proprietary codec...,2,1,1,0
9,2017-10-10 06:55:21,Peter Hoeg,Peter Hoeg,vivaldi: 1.12.995.36 -> 1.12.995.38\n,4,2,2,0


## Commits / Year

In [3]:
data_by_year = data.groupby(data.date.dt.year)
commits_per_year = data_by_year.size()
iplot(commits_per_year.iplot(asFigure=True, dimensions=(750, 500), kind="bar", yTitle="Number of Commits", xTitle="Year"))

## Percentage of change of number of commits compared to previous year 

In [4]:
iplot(commits_per_year.pct_change().iplot(asFigure=True, dimensions=(750, 500), kind='bar', yTitle="Percentage of Change", xTitle="Year"))

## Commiters / Year

In [5]:
committers_per_year = data_by_year.author.nunique()
iplot(committers_per_year.iplot(asFigure=True, dimensions=(750, 500), kind="bar", yTitle="Number of Commiters", xTitle="Year"))

## Percentage of change of number of committers compared to previous year

In [6]:
iplot(committers_per_year.pct_change().iplot(asFigure=True, dimensions=(750, 500), kind="bar", yTitle="Percentage of Change", xTitle="Year"))

## Commits/Committer per Year

In [7]:
c = committers_per_year.to_frame(name='committers').join(commits_per_year.to_frame(name='commits'))
c['commits_per_committer'] = c['commits']/c['committers']

In [8]:
iplot(c.loc[:, 'commits_per_committer'].iplot(asFigure=True, dimensions=(750, 500), kind="bar"))

## Number of Commits/Author/Year

In [9]:
data_by_author_year = data.groupby([data.date.dt.year, data.author])
commits_per_author_year = data_by_author_year.size().unstack(0).fillna(0)
commits_per_author_year

date,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
(cdep)illabout,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0
0ida,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
0w0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
0xABAB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
239,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
2chilled,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
3noch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
8573,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
=,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,35.0,1.0
A.J.Rouvoet,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0


## Number of Commits per Year by a specific Author

In [10]:
iplot(commits_per_author_year.loc['Eelco Dolstra'].iplot(asFigure=True, dimensions=(750, 500), kind="bar"))

## Committer Stats

In [11]:
data.groupby(data.author).agg({
    'date': ['size'],
    'lines': ['sum', 'mean', 'min', 'max'],
    'insertions': ['sum', 'mean', 'min', 'max'],
    'deletions': ['sum', 'mean', 'min', 'max'],
    'net': ['sum', 'mean', 'min', 'max'],
}).sort_values([('date', 'size')], ascending=False).iloc[:20,:]

Unnamed: 0_level_0,date,lines,lines,lines,lines,insertions,insertions,insertions,insertions,deletions,deletions,deletions,deletions,net,net,net,net
Unnamed: 0_level_1,size,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max
author,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Peter Simons,13385,3834202,286.455136,0,1218550,2108492,157.526485,0,549015,1725710,128.928651,0,1107576,382782,28.597833,-996602,458689
Eelco Dolstra,11660,3297106,282.770669,0,519024,2021107,173.336792,0,509309,1275999,109.433877,0,112356,745108,63.902916,-112092,499594
Vladimír Čunát,3688,9291585,2519.410249,1,1315539,5104957,1384.20743,0,770790,4186628,1135.20282,0,1122040,918329,249.00461,-996649,578417
Michael Raskin,3573,309102,86.510495,0,95813,217617,60.905961,0,60832,91485,25.604534,0,34981,126132,35.301427,-14960,25851
Lluís Batlle i Rossell,2895,496479,171.495337,0,128256,271105,93.645941,0,119110,225374,77.849396,0,117971,45731,15.796546,-107686,112796
William A. Kennington III,2360,1949455,826.040254,1,650043,1556432,659.505085,0,554356,393023,166.535169,0,95687,1163409,492.969915,-35533,458669
Thomas Tuegel,2262,958149,423.584881,1,106870,553874,244.860301,0,60194,404275,178.72458,0,46676,149599,66.135721,-11575,43756
Ludovic Courtès,2006,58328,29.07677,0,2748,42634,21.25324,0,2747,15694,7.823529,0,1227,26940,13.429711,-1227,2746
Shea Levy,1992,307826,154.531124,1,63486,199330,100.065261,0,43693,108496,54.465863,0,19793,90834,45.599398,-3686,23900
Nikolay Amiantov,1831,188701,103.058984,1,30614,126301,68.979246,0,24467,62400,34.079738,0,8574,63901,34.899508,-7377,18320


## Commiter Stats per Year

In [12]:
data_by_author_year.agg({
    'date': ['count'],
    'lines': ['sum', 'mean', 'min', 'max'],
    'insertions': ['sum', 'mean', 'min', 'max'],
    'deletions': ['sum', 'mean', 'min', 'max'],
    'net': ['sum', 'mean', 'min', 'max'],
})

Unnamed: 0_level_0,Unnamed: 1_level_0,date,lines,lines,lines,lines,insertions,insertions,insertions,insertions,deletions,deletions,deletions,deletions,net,net,net,net
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max
date,author,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2003,Eelco Dolstra,214,14510,67.803738,0,3176,8890,41.542056,0,667,5620,26.261682,0,3176,3270,15.280374,-3176,645
2003,Eelco Visser,18,1468,81.555556,0,316,1215,67.500000,0,243,253,14.055556,0,73,962,53.444444,-55,225
2003,Martin Bravenboer,1,28,28.000000,28,28,28,28.000000,28,28,0,0.000000,0,0,28,28.000000,28,28
2004,Armijn Hemel,82,1789,21.817073,0,305,1585,19.329268,0,305,204,2.487805,0,41,1381,16.841463,-2,305
2004,Eelco Dolstra,292,20758,71.089041,1,1283,13076,44.780822,0,899,7682,26.308219,0,807,5394,18.472603,-574,714
2004,Eelco Visser,13,4735,364.230769,2,1504,3203,246.384615,0,1500,1532,117.846154,0,1483,1671,128.538462,-1483,1496
2004,Martin Bravenboer,84,2426,28.880952,0,147,2017,24.011905,0,143,409,4.869048,0,64,1608,19.142857,-64,139
2004,Merijn de Jonge,1,3,3.000000,3,3,2,2.000000,2,2,1,1.000000,1,1,1,1.000000,1,1
2004,Rob Vermaas,13,181,13.923077,2,77,99,7.615385,1,27,82,6.307692,0,56,17,1.307692,-35,27
2005,Armijn Hemel,337,12802,37.988131,0,2788,10943,32.471810,0,2788,1859,5.516320,0,748,9084,26.955490,-644,2788


## Number of commits per Day

In [13]:
daily = data.set_index('date').groupby(pandas.TimeGrouper(freq='D')).size()
monthly_mean = daily.groupby(pandas.TimeGrouper(freq='M')).mean()
monthly_mean.index = monthly_mean.index.strftime('%Y/%m')

In [14]:
iplot(monthly_mean.iplot(asFigure=True, dimensions=(750, 500)))

In [15]:
iplot(monthly_mean[-36:].iplot(asFigure=True, dimensions=(750, 500), vline=['2017/09', '2017/03', '2016/09', '2016/03', '2015/09']))