# System performance analysis

## 1. Database connection
Establish connection to neo4j database with performance data.

In [191]:
import py2neo

graph = py2neo.Graph(bolt=True, host='localhost', user='neo4j', password='neo4j')


## 2. Cypher query
Cypher queries are executed to get CPU and memory utilization data. The results are stored in dataframes.

In [192]:
import pandas as pd

cpu_query ="""
MATCH (n:Directory)-[:CONTAINS]->(c:CpuUtilizationRecord { hostname: 'middletier2' })
WHERE n.fileName =~ '.*/1-MemoryLeak-5/kieker-logs/kieker-20150820-064855519-UTC-middletier2-KIEKER'
RETURN c.timestamp, c.cpuID, c.totalUtilization
ORDER BY c.timestamp
"""
df = pd.DataFrame(graph.run(cpu_query).data())
df = df.rename(columns={"c.timestamp": "Timestamp",
                        "c.cpuID": "CPU ID",
                        "c.totalUtilization": "CPU Utilization"})

mem_query ="""
MATCH (n:Directory)-[:CONTAINS]->(m:MemSwapUsageRecord { hostname: 'middletier2' })
WHERE n.fileName =~ '.*/1-MemoryLeak-5/kieker-logs/kieker-20150820-064855519-UTC-middletier2-KIEKER'
RETURN m.timestamp, toFloat(m.memUsed) / toFloat(m.memTotal)
ORDER BY m.timestamp
"""
df_mem = pd.DataFrame(graph.run(mem_query).data())
df_mem = df_mem.rename(columns={"m.timestamp": "Timestamp",
                                "toFloat(m.memUsed) / toFloat(m.memTotal)": "Memory Utilization"})

## 3. Analaysis and Visualization
The data is analyzed, merged and visualized. The timestamps only differ in the microseconds.

They are rounded to the nearest second to allow the merge of the dataframes.

In [193]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['Timestamp'] = df['Timestamp'].dt.round('1s')
df['CPU ID'] = pd.to_numeric(df['CPU ID'])
df = df.groupby(['Timestamp']).mean()
df = df.drop('CPU ID', 1)

df_mem['Timestamp'] = pd.to_datetime(df_mem['Timestamp'])
df_mem['Timestamp'] = df_mem['Timestamp'].dt.round('1s')
df_mem.set_index('Timestamp', inplace=True)

df = df.merge(df_mem, left_on='Timestamp', right_on='Timestamp')

In [194]:
df.head()

Unnamed: 0_level_0,CPU Utilization,Memory Utilization
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-08-20 06:48:56,0.940196,0.40565
2015-08-20 06:49:06,0.185722,0.408538
2015-08-20 06:49:16,0.041046,0.410017
2015-08-20 06:49:26,0.006507,0.410202
2015-08-20 06:49:36,0.009493,0.410079


In [195]:
from IPython.display import display, HTML

base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

In [196]:
import pygal as pg

line_chart = pg.Line()
line_chart.title = 'System Measurements'
line_chart.y_labels = .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0
line_chart.add('Core Utilization', pd.Series(df['CPU Utilization']))
line_chart.add('Memory Utilization in %', pd.Series(df['Memory Utilization']))

display(HTML(base_html.format(rendered_chart=line_chart.render(is_unicode=True))))
