In [2]:
import pygal as pg
from string import Template
from IPython.core.display import display, HTML

%load_ext cypher
%config CypherMagic.uri='http://neo4j:neo@localhost:7474/db/data'

In [3]:
base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

# Analysethema

## Fragestellung

Was möchte ich erfahren?

## Datenquelle

* Java-Strukturen des Shopizer-Systems mittels jQAssistant gescannt und in Neo4j abfragbar
* Git-Historie des Shopizer-Systems mittels jQAssistant gescannt und in Neo4j abfragbar


* Identifikation der fachlichen Komponenten im Source Code notwendig (siehe 01)
* Matching zwischen Entwicklern und fachlichen Komponenten

## Annahmen

## Validierung

## Implementierung

In [5]:
%%cypher
// Liste der Autoren
MATCH  (author:Author)
RETURN author.name AS Name, author.email AS EMail LIMIT 20

20 rows affected.


Name,EMail
Jens Schauder,jschauder@vmware.com
Christoph Strobl,cstrobl@vmware.com
Mark Paluch,mpaluch@vmware.com
Greg L. Turnquist,gturnquist@vmware.com
Oliver Drotbohm,odrotbohm@vmware.com
David Julia,dajulia3@gmail.com
Gatto,roggatto@inmetrics.com.br
larsw,lars@sral.org
Divya Srivastava,divya.jnu08@gmail.com
divya_jnu08,divya@virtueanalytics.com


In [6]:
%%cypher
// Anzahl der Autoren
MATCH  (author:Author)
RETURN count(author)

1 rows affected.


count(author)
152


In [16]:
commitsPerAuthor = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit), \
                                 (c)-[:CONTAINS_CHANGE]->(:Change)-[:MODIFIES]->(file:File) \
                           WHERE NOT c:Merge \
                           WITH a, count(DISTINCT c) AS Commits \
                           WHERE Commits > 1 \
                           RETURN a.name as Entwickler, Commits \
                           ORDER BY Commits DESC LIMIT 10
                            

commitsPerAuthor_df = commitsPerAuthor.get_dataframe()

#Visualisierung

bar_chart = pg.Bar(show_legend=True, human_readable=True, 
fill=True, legend_at_bottom=True, legend_at_bottom_columns=2)
bar_chart.title = 'Entwickler mit den meisten Commits'
for index, row in commitsPerAuthor_df.iterrows():
     bar_chart.add(row['Entwickler'],[{"value": row['Commits']}])
display(HTML(base_html.format(rendered_chart=bar_chart.render(is_unicode=True))))

10 rows affected.


## Ergebnisse

In [6]:
%%cypher
// Committers je Bounded Context
MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
      (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext)
WHERE NOT c:Merge
RETURN bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc

142 rows affected.


BoundedContext,Author,Commits
aggregation,Christoph Strobl,146
aggregation,Mark Paluch,119
aggregation,Thomas Darimont,39
aggregation,Spring Operator,20
aggregation,Oliver Gierke,13
aggregation,Oliver Drotbohm,11
aggregation,Eddú Meléndez,4
aggregation,Matt Morrissette,3
aggregation,Gustavo de Geus,3
aggregation,Christian Ivan,3


In [5]:
%%cypher
// Top-Committer je Bounded Context
MATCH    (c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
         (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext),
         (a:Author)-[:COMMITTED]->(c)
WHERE    NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc
WITH     BoundedContext, collect(Author)[..1] AS TopAuthor
UNWIND   TopAuthor AS Author
RETURN   BoundedContext, Author

13 rows affected.


BoundedContext,Author
aggregation,Christoph Strobl
convert,Christoph Strobl
geo,Mark Paluch
index,Christoph Strobl
mapping,Christoph Strobl
mapreduce,Mark Paluch
messaging,Mark Paluch
query,Christoph Strobl
schema,Mark Paluch
script,Spring Operator


In [13]:
%%cypher
// BC, TopAuthor with CommitCount
MATCH (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
      (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext)
WHERE NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc 
WITH   BoundedContext, collect(Commits)[..1] AS CommitCountByTopAuthorList, collect(Author)[..1] AS TopAuthorList
UNWIND CommitCountByTopAuthorList AS CommitCountByTopAuthor
UNWIND TopAuthorList AS TopAuthor
RETURN BoundedContext, TopAuthor, CommitCountByTopAuthor

13 rows affected.


BoundedContext,TopAuthor,CommitCountByTopAuthor
aggregation,Christoph Strobl,146
convert,Christoph Strobl,257
geo,Mark Paluch,23
index,Christoph Strobl,73
mapping,Christoph Strobl,57
mapreduce,Mark Paluch,22
messaging,Mark Paluch,23
query,Christoph Strobl,182
schema,Mark Paluch,13
script,Spring Operator,19


In [7]:
%%cypher
// Liste von Comittern pro BoundedContext
MATCH    (c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
         (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext),
         (a:Author)-[:COMMITTED]->(c)
WHERE    NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc
WITH     BoundedContext, collect(Author) AS Authors
RETURN   BoundedContext, Authors

13 rows affected.


BoundedContext,Authors
aggregation,"['Christoph Strobl', 'Mark Paluch', 'Thomas Darimont', 'Spring Operator', 'Oliver Gierke', 'Oliver Drotbohm', 'Eddú Meléndez', 'Matt Morrissette', 'Gustavo de Geus', 'Christian Ivan', 'Sergey Shcherbakov', 'Shashank Sharma', 'Sebastian Herold', 'Jens Schauder', 'Yadhukrishna S Pai', 'Alessio Fachechi', 'Nikolai Bogdanov', 'Jérome GUYON', 'Tobias Trelle']"
convert,"['Christoph Strobl', 'Mark Paluch', 'Oliver Gierke', 'Oliver Drotbohm', 'Thomas Darimont', 'Spring Operator', 'Thiago Diniz da Silveira', 'Ken Dombeck', 'Kevin Dosey', 'Patryk Wąsik', 'Heesu Jung', 'Christian Ivan', 'David Julia', 'Jordi Llach Fernandez', 'Roman Puchkovskiy', 'Divya Srivastava', 'Jens Schauder']"
geo,"['Mark Paluch', 'Spring Operator', 'Christoph Strobl', 'larsw', 'Thomas Darimont', 'Oliver Drotbohm', 'Bjorn Harvold', 'Oliver Gierke']"
index,"['Christoph Strobl', 'Mark Paluch', 'Oliver Gierke', 'Spring Operator', 'Oliver Drotbohm', 'Thomas Darimont', 'Martin Macko', 'Eddú Meléndez', 'mpollack', 'Dave Perryman', 'Jens Schauder', 'Laurent Canet', 'Philipp Schneider', 'Thomas Risberg', 'Johno Crawford', 'Jordi Llach Fernandez']"
mapping,"['Christoph Strobl', 'Oliver Gierke', 'Mark Paluch', 'Oliver Drotbohm', 'Spring Operator', 'Thomas Darimont', 'Eddú Meléndez', 'BraveLeeLee', 'Gatto', 'Kim Toms', 'Patryk Wąsik', 'Divya Srivastava', 'Martin Baumgartner', 'Maciej Walkowiak', 'Roman Puchkovskiy', 'Christoph Leiter', 'Michael Simons']"
mapreduce,"['Mark Paluch', 'Spring Operator', 'Christoph Strobl', 'Oliver Gierke', 'mpollack', 'Oliver Drotbohm', 'Thomas Darimont', 'Jens Schauder']"
messaging,"['Mark Paluch', 'Christoph Strobl', 'Spring Operator']"
query,"['Christoph Strobl', 'Mark Paluch', 'Oliver Gierke', 'Oliver Drotbohm', 'Thomas Darimont', 'Spring Operator', 'Eddú Meléndez', 'Thomas Risberg', 'Edward Prentice', 'Yoann de Martino', 'alex-on-java', 'Christian Ivan', 'Clément Petit', 'John Willemin', 'Victor', 'Sebastien Deleuze', 'ddebray', 'Artyom Gabeev', 'Patryk Wąsik', 'Pavel Vodrážka', 'Andrey Bloschetsov', 'Andrew Duncan', 'Philipp Schneider', 'Maciej Walkowiak', 'Sébastien Deleuze', 'Jens Schauder', 'Becca Gaspard', 'Alexey Plotnik', 'Andreas Zink', 'Ziemowit Stolarczyk', 'divya_jnu08', 'mpollack', 'abarkan', 'Tjeu Kayim', 'owen-q']"
schema,"['Mark Paluch', 'Christoph Strobl', 'Spring Operator', 'Michal Kurcius']"
script,"['Spring Operator', 'Mark Paluch', 'Christoph Strobl', 'Oliver Drotbohm']"


In [8]:
%%cypher
// Anzahl Comitter pro BoundedContext (absteigend sortiert)
MATCH    (c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
         (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext),
         (a:Author)-[:COMMITTED]->(c)
WHERE    NOT c:Merge
WITH     bC.name AS BoundedContext, a.name AS Author, count(DISTINCT c) AS Commits
ORDER BY BoundedContext, Commits Desc
WITH     BoundedContext, count(Author) AS AuthorCount
RETURN   BoundedContext, AuthorCount
ORDER BY AuthorCount DESC

13 rows affected.


BoundedContext,AuthorCount
query,35
aggregation,19
convert,17
mapping,17
index,16
geo,8
mapreduce,8
spel,5
schema,4
script,4


In [14]:
%%cypher
// CommitsCount je Bounded Context
MATCH    (a:Author)-[:COMMITTED]->(c:Commit)-[:CONTAINS_CHANGE]->(:Change)-[]->(f:Git:File),
         (f)<-[:HAS_SOURCE]-(:Type:Java)<-[:CONTAINS]-(bC:BoundedContext)
WHERE    NOT c:Merge
RETURN 	 bC.name AS BoundedContext, count(DISTINCT c) AS TotalCommitCount
ORDER BY TotalCommitCount DESC

13 rows affected.


BoundedContext,TotalCommitCount
convert,690
query,567
aggregation,371
mapping,206
index,200
mapreduce,77
spel,68
geo,61
messaging,45
script,35


In [None]:
%%cypher
// BC, CommitCount, TopAuthor, CommitCount by TopAuthor, CommitAnteil by TopAuthor

## Nächste Schritte