In [1]:
import pandas
import plotly.express as px
from IPython.display import display, Markdown

%load_ext cypher
%config CypherMagic.uri='http://neo4j:neo@localhost:7474/db/data'

# Experten-Analyse anhand der Git-Historie

## Fragestellung
1. Welche Entwickler können als Experten der fachlichen Komponenten identifiziert werden?
2. Wie verteilt sich das Wissen über die fachlichen Komponeten auf die Entwickler? 
    * Tragen einzelene Entwickler eher zu vielen oder eher nur zu einer einzigen fachlichen Komponente etwas bei? 
    * Gibt es Komponenten, an denen praktisch nur ein Entwickler arbeitet, sodass das Wissen stark konzentriert ist?

## Datenquelle
* Java-Strukturen der Spring-Data-MongoDB-Anwendung mittels jQAssistant gescannt und in Neo4j abfragbar
* Git-Historie mittels jQAssistant gescannt und in Neo4j abfragbar


* Identifikation der fachlichen Komponenten im Source Code (siehe 0)
* Matching zwischen Entwicklern und fachlichen Komponenten

## Annahmen
* Viele Commits eines Entwicklers ist gleichbedeutend mit vielen Beiträgen (?) (Anzahl der geänderten Zeilen eines Commits wird im jQAssistant nicht erfasst)
* **TODO**: "Committer" und "Author" eines Commits manchmal verschieden 
    * Author hat inhaltliche Änderungen gemacht. Comitter kann abweichen, z.B. beim Mergen oder wenn Author keine Commit-Rechte hat.
    * Hier erstmal nur Betrachtung des Authors.

In [2]:
%%cypher
// Anzahl Commits, bei denen der Comitter != Author
MATCH (n:Commit) 
WHERE n.committer <> n.author
RETURN count(n)

1 rows affected.


count(n)
1729


In [3]:
%%cypher
// Committer, die Änderungen von anderen Authors committet haben
MATCH (n:Commit) 
WHERE n.committer <> n.author 
RETURN DISTINCT n.committer

18 rows affected.


n.committer
Christoph Strobl <cstrobl@vmware.com>
Mark Paluch <mpaluch@vmware.com>
GitHub <noreply@github.com>
Mark Paluch <mpaluch@pivotal.io>
Jens Schauder <jschauder@vmware.com>
Christoph Strobl <cstrobl@pivotal.io>
Greg Turnquist <gturnquist@pivotal.io>
Jens Schauder <jschauder@pivotal.io>
Oliver Drotbohm <odrotbohm@pivotal.io>
Oliver Gierke <ogierke@pivotal.io>


Ohne Duplikate (8 Committer)
* Christoph Strobl
* Jens Schauder
* Jon Brisbin
* Greg Turnquist
* Mark Paluch
* Oliver Drotbohm 
* Oliver Gierke
* Thomas Darimont


## Validierung
* Zur Auswertung der Fragestellung betrachen wir
    * Anzahl Commits der Entwickler, die Änderungen in den fachlichen Komponenten enthalten
    * Anzahl der geänderten Dateien und Art der Modifikation, die Entwickler durch die Commits vornehmen
    * Zeitraum, über den Entwickler die Commits erstellt haben

## Implementierung

* Prüfung der Author-Knoten nach Duplikaten

In [4]:
%%cypher
// Prüfen, ob es unterschiedliche E-Mailadresse für den gleichen Namen gibt
MATCH (author:Author) 
RETURN count(DISTINCT author.name) AS AuthorNames, count(DISTINCT author.email) AS AuthorEmails

1 rows affected.


AuthorNames,AuthorEmails
150,150


* Es gibt genauso viele unterschiedliche E-Mailadressen wie Namen, als keine Namen mit mehreren E-Mail-Adressen.
* Händische Prüfung ergibt folgende (vermutete) Duplikate:
    * `Mark Pollack` `mark.pollack@springsource.com` und `mpollack` `mpollack@vmware.com`
    * `Greg Turnquist` `gturnquist@vmware.com` und `Greg L. Turnquist` `gturnquist@pivotal.io`
    * `owen.qqq` `owen.qqq@kakaocommerce.com` und `owen-q` `owen.q.dev@gmail.com`


In [5]:
%%cypher
// Bereinigung von Autor-Duplikaten (Manuelles Postprocessing)
WITH [
  ["Mark Pollack", "mark.pollack@springsource.com", "mpollack@vmware.com"],
  ["Greg Turnquist", "gturnquist@vmware.com", "gturnquist@pivotal.io"],
  ["owen.qqq", "owen.qqq@kakaocommerce.com", "owen.q.dev@gmail.com"]
] AS authors
UNWIND authors AS duplicateAuthor
MATCH (author:Author{email: duplicateAuthor[1]}),
      (duplicate:Author{email: duplicateAuthor[2]})
SET author.name = duplicateAuthor[0]      
WITH author, duplicate
MATCH (duplicate)-[:COMMITTED]->(c:Commit)
MERGE (author)-[:COMMITTED]->(c)
DETACH DELETE duplicate
RETURN author.name AS AuthorName, author.email AS AuthorMail, count(DISTINCT duplicate) AS Duplicates

0 rows affected.


AuthorName,AuthorMail,Duplicates


## Ergebnisse

### Entwickler und Anzahl Commits - allgemein

In [6]:
%%cypher
// Anzahl der Autoren
MATCH  (author:Author)
RETURN count(author) AS AuthorCount

1 rows affected.


AuthorCount
150


In [57]:
authorsByCommitCount = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit), \
                                 (c)-[:CONTAINS_CHANGE]->(:Change)-[:MODIFIES]->(file:File) \
                           WHERE NOT c:Merge AND NOT a.name STARTS WITH "Spring" \
                           WITH a, count(DISTINCT c) AS Commits \
                           WHERE Commits > 1 \
                           RETURN a.name as Entwickler, Commits \
                           ORDER BY Commits DESC LIMIT 10
                            

authorsByCommitCount_df = authorsByCommitCount.get_dataframe()
fig = px.fig = px.bar(authorsByCommitCount_df, x='Entwickler', y='Commits', title='Entwickler mit den meisten Commits')
fig.show()

10 rows affected.


In [24]:
commitCountByAuthor = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit), \
                                 (c)-[:CONTAINS_CHANGE]->(:Change)-[:MODIFIES]->(file:File) \
                           WHERE NOT c:Merge AND NOT a.name STARTS WITH 'Spring' \
                           RETURN a.name as Entwickler, count(DISTINCT c) AS Commits \
                           ORDER BY Commits DESC LIMIT 13
                            

commitCountByAuthor_df = commitCountByAuthor.get_dataframe()

fig = px.pie(commitCountByAuthor_df, values='Commits', names='Entwickler', title='Verteilung der Commits auf Entwickler (mit min. 0,2% Anteil an allen Commits)')
fig.show()

13 rows affected.


In [9]:
# Gruppierung Entwickler nach Anzahl Commits
authorsByCommitCount = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit), \
                                 (c)-[:CONTAINS_CHANGE]->(:Change)-[:MODIFIES]->(file:File) \
                           WHERE NOT c:Merge \
                           RETURN a.name as Entwickler, count(DISTINCT c) AS Commits

authorsByCommitCount_df = authorsByCommitCount.get_dataframe()

count_1_to_4 = 0
count_5_to_9 = 0
count_10_to_99 = 0
count__100_to_499 = 0
count__500_to_inf = 0
for i, row in authorsByCommitCount_df.iterrows():
    if row['Commits'] >= 500:
        count__500_to_inf = count__500_to_inf + 1
    elif row['Commits'] >= 100:
        count__100_to_499 = count__100_to_499 + 1
    elif row['Commits'] >= 10:
        count_10_to_99 = count_10_to_99 + 1
    elif row['Commits'] >= 5:
        count_5_to_9 = count_5_to_9 + 1
    elif row['Commits'] >= 1:
        count_1_to_4 = count_1_to_4 + 1

data = dict()
data['commit_count_intervals'] = ['1 bis 4', '5 bis 9', '10 bis 99', '100 bis 499', 'ab 500']
data['author_count'] = [count_1_to_4, count_5_to_9, count_10_to_99, count__100_to_499, count__500_to_inf]

authorCountGroupedByCommitCount_df = pandas.DataFrame(data=data)
fig = px.fig = px.bar(authorCountGroupedByCommitCount_df, x='commit_count_intervals', y='author_count', 
                      title='Gruppierung der Entwickler nach Anzahl ihrer Commits',
                      labels={'commit_count_intervals':'Anzahl der Commits', 'author_count': 'Anzahl der Entwickler'})
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1, opacity=0.6)
fig.show()


149 rows affected.


### Entwickler und Anzahl Commits - bezogen auf fachliche Komponenten

In [10]:
%%cypher
// Committers je Bounded Context
MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
      (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext)
WHERE NOT c:Merge
RETURN bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc

262 rows affected.


BoundedContext,Author,Commits
aggregation,Christoph Strobl,146
aggregation,Mark Paluch,119
aggregation,Thomas Darimont,39
aggregation,Spring Operator,20
aggregation,Oliver Gierke,13
aggregation,Oliver Drotbohm,11
aggregation,Eddú Meléndez,4
aggregation,Matt Morrissette,3
aggregation,Gustavo de Geus,3
aggregation,Christian Ivan,3


In [25]:
%%cypher
// Top-Committer je Bounded Context
MATCH    (c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
         (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext),
         (a:Author)-[:COMMITTED]->(c)
WHERE    NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc
WITH     BoundedContext, collect(Author)[..1] AS TopAuthor
UNWIND   TopAuthor AS Author
RETURN   BoundedContext, Author

19 rows affected.


BoundedContext,Author
aggregation,Christoph Strobl
config,Mark Paluch
convert,Christoph Strobl
core,Christoph Strobl
geo,Mark Paluch
gridfs,Mark Paluch
index,Christoph Strobl
mapping,Christoph Strobl
mapreduce,Mark Paluch
messaging,Mark Paluch


In [12]:
%%cypher
// BC, TopAuthor with CommitCount
MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
      (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext)
WHERE NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc 
WITH   BoundedContext, collect(Commits)[..1] AS CommitCountByTopAuthorList, collect(Author)[..1] AS TopAuthorList
UNWIND CommitCountByTopAuthorList AS CommitCountByTopAuthor
UNWIND TopAuthorList AS TopAuthor
RETURN BoundedContext, TopAuthor, CommitCountByTopAuthor

19 rows affected.


BoundedContext,TopAuthor,CommitCountByTopAuthor
aggregation,Christoph Strobl,146
config,Mark Paluch,46
convert,Christoph Strobl,257
core,Christoph Strobl,231
geo,Mark Paluch,23
gridfs,Mark Paluch,41
index,Christoph Strobl,73
mapping,Christoph Strobl,57
mapreduce,Mark Paluch,22
messaging,Mark Paluch,23


In [26]:

bcCommitAndAuthorCount = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File), \
                                (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext) \
                                WHERE NOT c:Merge \
                                RETURN 	 bC.name AS BoundedContext, count(DISTINCT c) AS TotalCommitCount, count(DISTINCT a.name) AS AuthorCount \
                                ORDER BY TotalCommitCount DESC


df = bcCommitAndAuthorCount.get_dataframe()

fig = px.line(df, x='BoundedContext', y=['AuthorCount', 'TotalCommitCount'], height=1500, width=1000,
              labels={'value': 'Anzahl Commits bzw. Entwickler', 'variable': ''},
              title='Gegenüberstellung: Anzahl Commits und Anzahl beteiligter Entwickler in einer Komponente', markers=True)
fig.data[0].name = 'Anzahl beteiligter Entwickler'
fig.data[1].name = 'Anzahl Commits'
fig.show()

19 rows affected.


In [27]:
# Vergleich Commit-Anteile des Top-Entwicklers and aller anderen Entwickler
# BC, CommitCountByBC, TopAuthor, CommitCountByTopAuthor, CommitRatio
commitRatioForBc = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File), \
                                (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext) \
                                WHERE NOT c:Merge \
                                WITH bC.name AS BoundedContext, count(DISTINCT c) AS TotalCommitCount, a.name AS Author \
                                ORDER BY TotalCommitCount DESC \
                                WITH DISTINCT BoundedContext, sum(TotalCommitCount) AS CommitCountByBC, collect(Author)[..1] AS TopAuthorList, collect(TotalCommitCount)[..1] AS CommitCountByTopAuthorList \
                                UNWIND CommitCountByTopAuthorList AS CommitCountByTopAuthor \
                                UNWIND TopAuthorList AS TopAuthor \
                                RETURN BoundedContext, CommitCountByBC, TopAuthor, CommitCountByTopAuthor, (100*CommitCountByTopAuthor/CommitCountByBC) AS CommitRatio \
                                ORDER BY BoundedContext

commitRatioForBc_query_df = commitRatioForBc.get_dataframe()
print(commitRatioForBc)

commit_ratio_other_authors_list = []
for i, row in commitRatioForBc_query_df.iterrows():
    commit_ratio_other_authors = 100 - row['CommitRatio']
    commit_ratio_other_authors_list.append(commit_ratio_other_authors)

commitRatioTopAuthor_data = dict()
commitRatioTopAuthor_data['bounded_context'] = commitRatioForBc_query_df['BoundedContext']
commitRatioTopAuthor_data['commit_ratio_top_author'] = commitRatioForBc_query_df['CommitRatio']
commitRatioTopAuthor_data['commit_ratio_other_authors'] = commit_ratio_other_authors_list


commitRatioForBc_df = pandas.DataFrame(data=commitRatioTopAuthor_data)
fig = px.bar(commitRatioForBc_df, x='bounded_context', y=['commit_ratio_top_author', 'commit_ratio_other_authors'],
             title='Anteil des Top-Comitters an allen Commits einer fachlichen Komponete', 
             labels={'bounded_context': 'Bounded Context', 'value': 'Commit-Anteile in %', 'variable': ''},
             color_discrete_map={'commit_ratio_top_author': 'rgb(42, 105, 137)', 'commit_ratio_other_authors': 'rgb(157, 203, 225)'})
fig.update_traces(marker_line_color='rgb(42, 105, 137)', marker_line_width=1, opacity=0.7)
fig.data[0].name = 'Commit-Anteil des Top-Entwicklers'
fig.data[1].name = 'Commit-Anteil aller anderen Entwickler'


fig.show()

19 rows affected.
+----------------+-----------------+------------------+------------------------+-------------+
| BoundedContext | CommitCountByBC |    TopAuthor     | CommitCountByTopAuthor | CommitRatio |
+----------------+-----------------+------------------+------------------------+-------------+
|  aggregation   |       371       | Christoph Strobl |          146           |      39     |
|     config     |       180       |   Mark Paluch    |           46           |      25     |
|    convert     |       690       | Christoph Strobl |          257           |      37     |
|      core      |       725       | Christoph Strobl |          231           |      31     |
|      geo       |        61       |   Mark Paluch    |           23           |      37     |
|     gridfs     |       108       |   Mark Paluch    |           41           |      37     |
|     index      |       200       | Christoph Strobl |           73           |      36     |
|    mapping     |       206    

In [15]:
# Vergleich Commit-Anteile der Top-2-Entwicklers and aller anderen Entwickler
# BC, CommitCountByBC, TopAuthor, CommitCountByTopAuthor, CommitRatio
commitRatioForBc = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File), \
                                (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext) \
                                WHERE NOT c:Merge \
                                WITH bC.name AS BoundedContext, count(DISTINCT c) AS TotalCommitCount, a.name AS Author \
                                ORDER BY TotalCommitCount DESC \
                                RETURN BoundedContext, sum(TotalCommitCount) AS CommitCountByBC, collect(Author)[..3] AS Top3Authors, collect(TotalCommitCount)[..3] AS CommitCountByTop3Authors ORDER BY BoundedContext
        
commitRatioForBc_query_df = commitRatioForBc.get_dataframe()

author_list = []
commit_ratio_top_authors_list = []
commit_ratio_other_authors_list = []
for i, row in commitRatioForBc_query_df.iterrows():
    authors = row['Top3Authors']
    for author in authors:
        if author not in author_list:
            author_list.append(author)
    
    commit_sum_top_authors = sum(row['CommitCountByTop3Authors'])
    commit_ratio_top_authors = int(round(100 * commit_sum_top_authors / row['CommitCountByBC']))
    commit_ratio_other_authors = 100 - commit_ratio_top_authors
    commit_ratio_other_authors_list.append(commit_ratio_other_authors)
    commit_ratio_top_authors_list.append(commit_ratio_top_authors)

commitRatioTop3Authors_data = dict()
commitRatioTop3Authors_data['bounded_context'] = commitRatioForBc_query_df['BoundedContext']
commitRatioTop3Authors_data['commit_ratio_top_authors'] = commit_ratio_top_authors_list
commitRatioTop3Authors_data['commit_ratio_other_authors'] = commit_ratio_other_authors_list

commitRatioForBc_df = pandas.DataFrame(data=commitRatioTop3Authors_data)
fig = px.bar(commitRatioForBc_df, x='bounded_context', y=['commit_ratio_top_authors', 'commit_ratio_other_authors'],
             title='Anteil der Top-3-Entwickler an allen Commits einer fachlichen Komponete', 
             labels={'bounded_context': 'Bounded Context', 'value': 'Commit-Anteile in %', 'variable': ''},
             color_discrete_map={'commit_ratio_top_authors': 'rgb(42, 105, 137)', 'commit_ratio_other_authors': 'rgb(157, 203, 225)'})
fig.update_traces(marker_line_color='rgb(42, 105, 137)', marker_line_width=1, opacity=0.7)
fig.data[0].name = 'Commit-Anteil der Top-3-Entwickler'
fig.data[1].name = 'Commit-Anteil aller anderen Entwickler'
fig.show()

display(Markdown('#### Alle Namen der Top-Entwickler: '))
for author in author_list:
    display(Markdown(f'{author}'))

19 rows affected.


#### Alle Namen der Top-Entwickler: 

Christoph Strobl

Mark Paluch

Thomas Darimont

Oliver Gierke

Spring Operator

Oliver Drotbohm

### Zu wie vielen Komponenten tragen einzelne Entwickler bei?
* Sind die Entwickler, die zu den meisten Komponenten etwsa beigetragen haben, identisch mit denen, die die meisten Commits erstellt haben?

In [28]:
# Entwickler, die zu den meisten Komponenten etwas beigetagen haben

bcCountByAuthor_query = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File), \
            (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext) \
            WHERE NOT c:Merge AND a.name <> "Spring Operator" \
            WITH bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits \
            WITH collect(BoundedContext) AS BoundedContextsListForCount, Author \
            UNWIND BoundedContextsListForCount AS BoundedContextForCount \
            WITH  Author, count(BoundedContextForCount) AS BoundedContextCount \
            WHERE BoundedContextCount > 1 \
            RETURN Author, BoundedContextCount \
            ORDER BY BoundedContextCount DESC

bcCountByAuthor_query_df = bcCountByAuthor_query.get_dataframe()

fig = px.fig = px.bar(bcCountByAuthor_query_df, x='Author', y='BoundedContextCount', 
                      labels={'Author': 'Entwickler', 'BoundedContextCount': 'Anzahl der fachlichen Komponenten'},
                      title='Entwickler und Anzahl der fachlichen Komponenten, zu denen sie etwas beigetragen haben')
fig.update_traces(marker_color='rgb(179, 204, 204)', marker_line_color='rgb(92, 138, 138)',
                  marker_line_width=1, opacity=0.6)
fig.show()


37 rows affected.


* Die beiden Top-Entwickler (Christoph Strobl und Mark Paluch) haben in allen 19 Komponenten Beiträge geleistet.
* Weiteren vorderen Plätze in den Top 5 decken sich ungefähr mit den Top-Entwickler nach Commit-Anzahl.
* Ausreißer:
    * Thomas Darimont hat Beiträge zu 13 Komponenten geleistet, aber nur 3,6% aller Commits beigetragen.
    * Thomas Risberg hat 4,4% aller Commits beigetragen, aber nur an in 3 fachlichen Komponenten.

In [93]:
# Verhältnis CommitsByAuthor und BcCountByAuthor

MAX_COMMIT_COUNT = 1574
MAX_BC_COUNT = 19

author_commitCount_map = dict()
for i, row in commitCountByAuthor_df.iterrows():
    author_commitCount_map[row['Entwickler']] = {'commitCount': row['Commits']}

for i, row in bcCountByAuthor_query_df.iterrows():
    author = row['Author']
    if author in author_commitCount_map:
        author_commitCount_map[author]['bcCount'] = row['BoundedContextCount']

for key in author_commitCount_map:
    if 'bcCount' not in author_commitCount_map[key]:
        author_commitCount_map[key]['bcCount'] = 1
        
author_list = []
relative_commit_count_list = []
relative_bc_count_list = []
for key in author_commitCount_map:
    commit_count = author_commitCount_map[key]['commitCount']
    bc_count = author_commitCount_map[key]['bcCount']

    relative_commit_count = commit_count / MAX_COMMIT_COUNT
    relative_bc_count = bc_count / MAX_BC_COUNT
    relative_commit_count_list.append(relative_commit_count)
    relative_bc_count_list.append(relative_bc_count)
    author_list.append(key)
    
commit_bc_count_ratio_list.sort(reverse=True)
commitBcRation_data = dict()
commitBcRation_data['author'] = author_list
commitBcRation_data['relative_commit_count'] = relative_commit_count_list
commitBcRation_data['relative_bc_count'] = relative_bc_count_list

commitBcRation_df = pandas.DataFrame(data=commitBcRation_data)

fig = px.fig = px.bar(commitBcRation_df, x='author', y=['relative_commit_count', 'relative_bc_count'], barmode='group',
                      color_discrete_map={'relative_commit_count': 'rgb(92, 138, 138)', 'relative_bc_count': 'rgb(179, 204, 204)'},
                      labels={'author': 'Top 13 Entwickler mit > 0,2%-Anteil an allen Commits', 'value': 'Relative Anzahl', 'variable': ''},
                      title='Verhältnis relative Anzahl der Komponenten zu relativer Anzahl der Commits')
fig.data[0].name = 'Relative Anzahl Commits'
fig.data[1].name = 'Relative Anzahl Komponenten'
fig.update_traces(opacity=0.6)
fig.show()
    

* Wenn rel. Commit-Anzahl deutlich niedriger als rel. Komponenten-Anzahl: 
    * Entwickler hat in vielen Komponenten ein bisschen was beigetragen
* Wenn rel. Commit-Anzahl über rel. Komponenten-Anzahl:
    * Entwickler hat in wenigen Komponenten verhältnismäßig viel beigetragen

### Betrachtung Anzahl und Art der geänderten Dateien der Commits

### Betrachtung über Zeitspanne

## Nächste Schritte

In [18]:
%%cypher
// Liste von Comittern pro BoundedContext
MATCH    (c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
         (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext),
         (a:Author)-[:COMMITTED]->(c)
WHERE    NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc
WITH     BoundedContext, collect(Author) AS Authors
RETURN   BoundedContext, Authors

19 rows affected.


BoundedContext,Authors
aggregation,"['Christoph Strobl', 'Mark Paluch', 'Thomas Darimont', 'Spring Operator', 'Oliver Gierke', 'Oliver Drotbohm', 'Eddú Meléndez', 'Matt Morrissette', 'Gustavo de Geus', 'Christian Ivan', 'Sergey Shcherbakov', 'Alessio Fachechi', 'Shashank Sharma', 'Jens Schauder', 'Yadhukrishna S Pai', 'Jérome GUYON', 'Sebastian Herold', 'Tobias Trelle', 'Nikolai Bogdanov']"
config,"['Mark Paluch', 'Oliver Gierke', 'Christoph Strobl', 'Spring Operator', 'Thomas Darimont', 'Oliver Drotbohm', 'Thomas Dudouet', 'Zied Yaich', 'Stephen Tyler Conrad', 'Viktor Khoroshko', 'Mark Pollack', 'Martin Baumgartner', 'Jens Schauder', 'Maciej Walkowiak', 'Ryan Tenney', 'John Blum', 'Mike Saavedra']"
convert,"['Christoph Strobl', 'Oliver Gierke', 'Mark Paluch', 'Oliver Drotbohm', 'Thomas Darimont', 'Spring Operator', 'Thiago Diniz da Silveira', 'Kevin Dosey', 'Ken Dombeck', 'Patryk Wąsik', 'Heesu Jung', 'Christian Ivan', 'David Julia', 'Jordi Llach Fernandez', 'Divya Srivastava', 'Roman Puchkovskiy', 'Jens Schauder']"
core,"['Christoph Strobl', 'Mark Paluch', 'Oliver Gierke', 'Oliver Drotbohm', 'Spring Operator', 'Thomas Darimont', 'Mark Pollack', 'Sebastien Deleuze', 'Laszlo Csontos', 'Michal Vich', 'Eddú Meléndez', 'Mikhail Mikhaylenko', 'Ken Dombeck', 'Borislav Rangelov', 'Martin Macko', 'Brice Vandeputte', 'Jens Schauder', 'Domenique Tilleuil', 'Amol Nayak', 'Ivan Sopov', 'Tobias Trelle', 'Philipp Schneider', 'Cimon Lucas (LCM)', 'Ryan Cloherty', 'nkey', 'Jacob Botuck', 'Sola', 'Komi Serge Innocent', 'kostya05983', 'wonwoo', 'Mathieu Ouellet', 'Roman Puchkovskiy', 'Yadhukrishna S Pai', 'Martin Baumgartner', 'Niko Schmuck', 'Sebastian Herold', 'Andreas Zink', 'Mainder Singh', 'Jan Kronquist', 'A. B. M. Kowser Patwary', 'Ilho Ahn', 'eric', 'Christoph Leiter', 'Chuong Ngo', 'Patryk Wąsik', 'Michael Simons', 'GotoFinal', 'abarkan', 'Juergen Zimmermann', 'Minsu']"
geo,"['Mark Paluch', 'Spring Operator', 'Christoph Strobl', 'larsw', 'Thomas Darimont', 'Oliver Drotbohm', 'Bjorn Harvold', 'Oliver Gierke']"
gridfs,"['Mark Paluch', 'Christoph Strobl', 'Spring Operator', 'Oliver Gierke', 'Hartmut Lang', 'Thomas Darimont', 'Oliver Drotbohm', 'Nick Stolwijk', 'konradend', 'Niklas Helge Hanft', 'Mathieu Ouellet', 'Denis Zavedeev', 'Martin Baumgartner', 'Philipp Schneider']"
index,"['Christoph Strobl', 'Mark Paluch', 'Oliver Gierke', 'Spring Operator', 'Oliver Drotbohm', 'Thomas Darimont', 'Martin Macko', 'Eddú Meléndez', 'Dave Perryman', 'Mark Pollack', 'Jens Schauder', 'Laurent Canet', 'Johno Crawford', 'Jordi Llach Fernandez', 'Philipp Schneider', 'Thomas Risberg']"
mapping,"['Christoph Strobl', 'Mark Paluch', 'Oliver Gierke', 'Oliver Drotbohm', 'Spring Operator', 'Thomas Darimont', 'Eddú Meléndez', 'BraveLeeLee', 'Gatto', 'Kim Toms', 'Martin Baumgartner', 'Roman Puchkovskiy', 'Patryk Wąsik', 'Divya Srivastava', 'Michael Simons', 'Maciej Walkowiak', 'Christoph Leiter']"
mapreduce,"['Mark Paluch', 'Spring Operator', 'Christoph Strobl', 'Oliver Gierke', 'Mark Pollack', 'Oliver Drotbohm', 'Thomas Darimont', 'Jens Schauder']"
messaging,"['Mark Paluch', 'Christoph Strobl', 'Spring Operator']"
