# Commits

In [1]:
import pandas
import plotly.plotly as plot
import cufflinks
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.figure_factory as ff
init_notebook_mode(connected=True)

## Raw Data

In [2]:
data = pandas.read_csv('./commit_history.csv')
data['date'] = pandas.to_datetime(data['date'])
data = data.loc[~data.message.str.startswith('Merge')]
data

Unnamed: 0,date,author,committer,message,lines,insertions,deletions,net
4,2017-10-25 10:36:30,volth,Franz Pletz,vuze: init at 5750 (#30557)\n\n,31,31,0,31
6,2017-10-25 10:26:13,Tobias Happ,Franz Pletz,phpPackages.couchbase: remove warnings on php ...,23,22,1,21
8,2017-10-25 10:11:49,Joerg Thalheim,Joerg Thalheim,cockroachdb: install bash completion into .bin\n,43,21,22,-1
11,2017-10-25 08:19:24,zraexy,zraexy,zanshin: don't use stdenv\n,6,3,3,0
18,2017-10-25 02:54:38,Vincent Laporte,Vincent Laporte,ocamlPackages.wasm: 0.13 -> 1.0\n,4,2,2,0
19,2017-10-25 02:44:04,Joerg Thalheim,Joerg Thalheim,xf86-video-intel: 2017-04-18 -> 2017-10-19\n,6,3,3,0
21,2017-10-25 01:09:29,Tom Doggett,Tom Doggett,rtv: 1.18.0 -> 1.19.0\n\nUpdated version and G...,4,2,2,0
26,2017-10-25 00:04:16,Roman Kuznetsov,Roman Kuznetsov,dotnet-sdk : init at 2.0.3\n,56,56,0,56
27,2017-10-24 23:40:31,Dmitry Kalinkin,Dmitry Kalinkin,"Revert ""vc: Broken on i686""\n\nNot broken\n\nT...",2,1,1,0
28,2017-10-24 23:24:08,Joerg Thalheim,Joerg Thalheim,bcc: fix install\n,2,1,1,0


## Commits / Year

In [3]:
data_by_year = data.groupby(data.date.dt.year)
commits_per_year = data_by_year.size()
iplot(commits_per_year.iplot(asFigure=True, dimensions=(750, 500), kind="bar", yTitle="Number of Commits", xTitle="Year"))

## Percentage of change of number of commits compared to previous year 

In [4]:
iplot(commits_per_year.pct_change().iplot(asFigure=True, dimensions=(750, 500), kind='bar', yTitle="Percentage of Change", xTitle="Year"))

## Commiters / Year

In [5]:
committers_per_year = data_by_year.author.nunique()
iplot(committers_per_year.iplot(asFigure=True, dimensions=(750, 500), kind="bar", yTitle="Number of Commiters", xTitle="Year"))

## Percentage of change of number of committers compared to previous year

In [6]:
iplot(committers_per_year.pct_change().iplot(asFigure=True, dimensions=(750, 500), kind="bar", yTitle="Percentage of Change", xTitle="Year"))

## Commits/Committer per Year

In [7]:
c = committers_per_year.to_frame(name='committers').join(commits_per_year.to_frame(name='commits'))
c['commits_per_committer'] = c['commits']/c['committers']
iplot(c.loc[:, 'commits_per_committer'].iplot(asFigure=True, dimensions=(750, 500), kind="bar"))

## Number of Commits/Author/Year

In [8]:
data_by_author_year = data.groupby([data.date.dt.year, data.author])
commits_per_author_year = data_by_author_year.size().unstack(0).fillna(0)
commits_per_author_year

date,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
(cdep)illabout,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0
0ida,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
0w0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
0xABAB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
239,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
2chilled,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
3noch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
8573,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
=,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,35.0,1.0
A.J.Rouvoet,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0


## Number of Commits per Year by a specific Author

In [9]:
iplot(commits_per_author_year.loc['Eelco Dolstra'].iplot(asFigure=True, dimensions=(750, 500), kind="bar"))

## Committer Stats

In [10]:
data.groupby(data.author).agg({
    'date': ['size'],
    'lines': ['sum', 'mean', 'min', 'max'],
    'insertions': ['sum', 'mean', 'min', 'max'],
    'deletions': ['sum', 'mean', 'min', 'max'],
    'net': ['sum', 'mean', 'min', 'max'],
}).sort_values([('date', 'size')], ascending=False).iloc[:20,:]

Unnamed: 0_level_0,date,lines,lines,lines,lines,insertions,insertions,insertions,insertions,deletions,deletions,deletions,deletions,net,net,net,net
Unnamed: 0_level_1,size,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max
author,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Peter Simons,13333,3364290,252.328058,0,1218550,1799005,134.928748,0,549015,1565285,117.39931,0,1107576,233720,17.529438,-996602,458689
Eelco Dolstra,11538,1303305,112.957618,0,112620,529009,45.849281,0,16346,774296,67.108338,0,112356,-245287,-21.259057,-112092,14716
Michael Raskin,3567,165573,46.417998,0,15140,125497,35.182787,0,15140,40076,11.235212,0,14966,85421,23.947575,-14960,15140
Vladimír Čunát,3084,196765,63.801881,1,30612,133427,43.264267,0,30612,63338,20.537613,0,9859,70089,22.726654,-3589,30612
Lluís Batlle i Rossell,2878,470099,163.342252,0,128256,251198,87.28214,0,119110,218901,76.060111,0,117971,32297,11.222029,-107686,112796
William A. Kennington III,2273,70944,31.211615,1,3541,40788,17.944567,0,3541,30156,13.267048,0,2720,10632,4.677519,-2714,3541
Thomas Tuegel,2134,692008,324.277413,1,106870,400773,187.803655,0,60194,291235,136.473758,0,46676,109538,51.329897,-11575,43756
Ludovic Courtès,2004,58177,29.030439,0,2748,42565,21.24002,0,2747,15612,7.790419,0,1227,26953,13.449601,-1227,2746
Nikolay Amiantov,1834,112588,61.389313,1,17197,66038,36.007634,0,8623,46550,25.381679,0,8574,19488,10.625954,-7377,7403
Shea Levy,1709,132510,77.536571,1,14133,83218,48.693973,0,6535,49292,28.842598,0,8564,33926,19.851375,-2995,3265


## Commiter Stats per Year

In [11]:
data_by_author_year.agg({
    'date': ['count'],
    'lines': ['sum', 'mean', 'min', 'max'],
    'insertions': ['sum', 'mean', 'min', 'max'],
    'deletions': ['sum', 'mean', 'min', 'max'],
    'net': ['sum', 'mean', 'min', 'max'],
})

Unnamed: 0_level_0,Unnamed: 1_level_0,date,lines,lines,lines,lines,insertions,insertions,insertions,insertions,deletions,deletions,deletions,deletions,net,net,net,net
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max,sum,mean,min,max
date,author,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2003,Eelco Dolstra,214,14510,67.803738,0,3176,8890,41.542056,0,667,5620,26.261682,0,3176,3270,15.280374,-3176,645
2003,Eelco Visser,18,1468,81.555556,0,316,1215,67.500000,0,243,253,14.055556,0,73,962,53.444444,-55,225
2003,Martin Bravenboer,1,28,28.000000,28,28,28,28.000000,28,28,0,0.000000,0,0,28,28.000000,28,28
2004,Armijn Hemel,82,1789,21.817073,0,305,1585,19.329268,0,305,204,2.487805,0,41,1381,16.841463,-2,305
2004,Eelco Dolstra,292,20758,71.089041,1,1283,13076,44.780822,0,899,7682,26.308219,0,807,5394,18.472603,-574,714
2004,Eelco Visser,13,4735,364.230769,2,1504,3203,246.384615,0,1500,1532,117.846154,0,1483,1671,128.538462,-1483,1496
2004,Martin Bravenboer,84,2426,28.880952,0,147,2017,24.011905,0,143,409,4.869048,0,64,1608,19.142857,-64,139
2004,Merijn de Jonge,1,3,3.000000,3,3,2,2.000000,2,2,1,1.000000,1,1,1,1.000000,1,1
2004,Rob Vermaas,13,181,13.923077,2,77,99,7.615385,1,27,82,6.307692,0,56,17,1.307692,-35,27
2005,Armijn Hemel,337,12802,37.988131,0,2788,10943,32.471810,0,2788,1859,5.516320,0,748,9084,26.955490,-644,2788


## Number of commits per Day

In [12]:
daily = data.set_index('date').resample('D').size()
monthly_mean = daily.resample('M').mean()
monthly_mean.index = monthly_mean.index.strftime('%Y/%m')
iplot(monthly_mean.iplot(asFigure=True, dimensions=(750, 500)))

In [13]:
iplot(monthly_mean[-36:].iplot(asFigure=True, dimensions=(750, 500), vline=['2017/09', '2017/03', '2016/09', '2016/03', '2015/09']))