In [1]:
# configure spark variables
from pyspark.context import SparkContext
from pyspark.sql.context import SQLContext
from pyspark.sql.session import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
    
sc = SparkContext()
sqlContext = SQLContext(sc)
spark = SparkSession(sc)

# load up other dependencies
import re
import pandas as pd

import glob
import matplotlib.pyplot as plt
import numpy as np

22/03/24 02:08:38 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [2]:
import os
if not os.path.exists("images"):
    os.mkdir("images")

import os
import glob
import plotly.express as px
from plotly.subplots import make_subplots
from pyspark.sql.types import StructType,StructField, StringType, IntegerType

In [3]:
html_output_file = '../index.html'
with open(html_output_file, 'w') as f:
    f.write('<h1>Open GiLAN Testbed Results</h1>')
    f.write('<h3> The graphs summaries the NFV performance metrics<h3>')
    f.write('<h4><a href="#cpu-metrics"> CPU </a> <h4>')
    f.write('<h4><a href="#memory-metrics"> Memory </a> <h4>')
    f.write('<h4><a href="#filesystem-metrics"> Filesytem </a> <h4>')
    f.write('<h4><a href="#disk-metrics"> Disk I/O </a> <h4>')
    f.write('<h4><a href="#networking-metrics"> Networking </a> <h4>')
    f.write('<h4><a href="#flame-graphs"> Flame Graphs to analyse code paths </a> <h4>')

In [4]:
with open(html_output_file, 'a') as f:
    f.write('<h2 id="cpu-metrics"> CPU Metrics </h2>')

In [5]:
runqlat_df = spark.read.option("basePath", "../ansible/.results").json("../ansible/.results/kpps=*/tool=runqlat").select(
    "kpps", "ts", "val_type", "tool",
    explode("data").alias("dataExplode")
).select(col("kpps").cast(StringType()), "ts", "val_type", "tool", "dataExplode.*", concat_ws('-',col('dataExplode.interval-start'),col('dataExplode.interval-end')).alias("interval"))
runqlat_fig = px.bar(runqlat_df.toPandas(), x="interval", y="count", color="kpps", barmode="group",
        title="Time each of the processes spends waiting for its turn on CPU",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of Processes",
                    "interval": "Latency (usecs)"
                })
runqlat_fig.show()
runqlat_fig.write_image("images/runqlat.jpeg")
with open(html_output_file, 'a') as f:
    f.write(runqlat_fig.to_html(full_html=False, include_plotlyjs='cdn'))

                                                                                

In [6]:
with open(html_output_file, 'a') as f:
    f.write('<div><a href="#profile">See code paths that are consuming CPU resources</a></div>')

In [7]:
with open(html_output_file, 'a') as f:
    f.write('<div><a href="#offcputime">See code paths casing processes to go Off-CPU</a></div>')

In [8]:
with open(html_output_file, 'a') as f:
    f.write('<h2 id="memory-metrics"> Memory Metrics </h2>')

In [9]:
data_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=sys_enter_brk-kernel")

schema = StructType([
  StructField('type', StringType(), True),
  StructField('kpps', StringType(), True),
  StructField('count', IntegerType(), True),
  StructField('process', IntegerType(), True),
  ])

#Create empty DataFrame directly.
sys_enter_brk_df = spark.createDataFrame([], schema)

columns = data_df.select("data.@.*").columns
data_df = data_df.select("*", "data.@.*").drop("data")

for c in columns:
  try:
    df = data_df.select("type", "kpps", col(c).alias('count')).withColumn('process', lit(c))
    sys_enter_brk_df = sys_enter_brk_df.union(df)
  except:
    print("Failed to find {}".format(c))

sys_enter_brk_fig = px.sunburst(sys_enter_brk_df.toPandas(), path=['kpps', 'process'], values='count',
                  color='process', hover_data=['count'],
                  title="Processes responsible for heap extension")
sys_enter_brk_fig.show()
sys_enter_brk_fig.write_image("images/sys_enter_brk.jpeg")
with open(html_output_file, 'a') as f:
    f.write(sys_enter_brk_fig.to_html(full_html=False, include_plotlyjs='cdn'))


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [10]:
with open(html_output_file, 'a') as f:
    f.write('<div><a href="#page_fault-user">See user code path responsible for page faults</a></div>')
    f.write('<div><a href="#page_fault-kernel">See kernel code path responsible for page faults</a></div>')

In [11]:
with open(html_output_file, 'a') as f:
    f.write('<h2 id="filesystem-metrics"> Filesystem Metrics </h2>')

In [12]:
vfsstat_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=vfsstat")
vfsstat_fig = px.bar(vfsstat_df.toPandas(), x="kpps", y=["WRITE", "READ", "OPEN", "FSYNC", "CREATE"], barmode='group',
            title="High-level workload characterization of virtual file system operations",
            labels={
                     "kpps": "Packets Per Second (thousand)",
                     "value": "Count",
                     "variable": "VFS operation"
                 })
vfsstat_fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
vfsstat_fig.show()
vfsstat_fig.write_image("images/vfsstat.jpeg")
with open(html_output_file, 'a') as f:
    f.write(vfsstat_fig.to_html(full_html=False, include_plotlyjs='cdn'))


In [13]:
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
data_df = spark.read.option("basePath", "../ansible/.results").json("../ansible/.results/kpps=*/tool=vfssize")
schema = StructType([
  StructField('type', StringType(), True),
  StructField('kpps', StringType(), True),
  StructField('count', IntegerType(), True),
  StructField('min', IntegerType(), True),
  StructField('max', IntegerType(), True),
  StructField('device', StringType(), True),
  StructField('program', StringType(), True),
  StructField('filename', StringType(), True),
  ])

#Create empty DataFrame directly.
vfssize_df = spark.createDataFrame([], schema)

columns = data_df.select("data.@.*").columns
columns = [c.replace('.', '_') for c in columns]
data_df = data_df.select("*", "data.@.*").drop("data")
data_df = data_df.toDF(*(c.replace('.', '_') for c in data_df.columns))
for c in columns:
  try:
    df = data_df.select("*", explode(c).alias("dataExplode")).select("type", "kpps", "dataExplode.*").withColumn('device', lit(c)).withColumn('program', lit(c.split(',')[0])).withColumn('filename', lit(c.split(',')[1]))
    vfssize_df = vfssize_df.union(df)
  except:
    print("Failed to find {}".format(c))

vfssize_df = vfssize_df.select("*", concat_ws(' - ',col('min'),col('max')).alias("interval"))

vfssize_fig_1 = px.scatter(vfssize_df.toPandas(), x="interval", y="kpps",
	         size="count", color="program",
             title="The frequency of I/O per proccess name per I/O size",
            labels={
                        "kpps": "Packets Per Second (thousand)",
                        "count": "Number of I/O operations",
                        "interval": "I/O Size (bytes)"
                    })
vfssize_fig_1.show()
vfssize_fig_1.write_image("images/vfssize-process.jpeg")
vfssize_fig_2 = px.scatter(vfssize_df.toPandas(), x="interval", y="kpps",
	         size="count", color="filename",
             title="The frequency of I/O per filename per I/O size",
            labels={
                        "kpps": "Packets Per Second (thousand)",
                        "count": "Number of I/O operations",
                        "interval": "I/O Size (bytes)"
                    })
vfssize_fig_2.show()
vfssize_fig_2.write_image("images/vfssize-filename.jpeg")
vfssize_fig_3 = px.bar(vfssize_df.toPandas(), x="interval", y="count", color="kpps", barmode="group",
        title="Time each of the processes spends waiting for its turn on CPU",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of I/O operations",
                    "interval": "I/O Size (bytes)"
                })
vfssize_fig_3.show()
vfssize_fig_3.write_image("images/vfssize.jpeg")
with open(html_output_file, 'a') as f:
    f.write(vfssize_fig_1.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(vfssize_fig_2.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(vfssize_fig_3.to_html(full_html=False, include_plotlyjs='cdn'))

22/03/24 02:08:58 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
22/03/24 02:09:12 WARN DAGScheduler: Broadcasting large task binary with size 1117.8 KiB
                                                                                

22/03/24 02:09:24 WARN DAGScheduler: Broadcasting large task binary with size 1117.8 KiB
                                                                                

22/03/24 02:09:32 WARN DAGScheduler: Broadcasting large task binary with size 1117.8 KiB
                                                                                

In [14]:
filetop_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=filetop")
# Reads
filetop_read_fig = px.sunburst(filetop_df.toPandas(), path=['kpps', 'FILE'], values='R_Kb',
                  color='FILE', hover_data=['READS'],
                  title="Most frequently read filenames - including TCP sockets",
                  labels={
                    "kpps": "Packets Per Second (thousand)",
                    "READS": "Number of file reads",
                    "FILE": "File names"
                })
filetop_read_fig.show()
filetop_read_fig.write_image("images/filetop-reads.jpeg")
# Writes
filetop_write_fig = px.sunburst(filetop_df.toPandas(), path=['kpps', 'FILE'], values='W_Kb',
                  color='FILE', hover_data=['WRITES'],
                  title="Most frequently written filenames - including TCP sockets")
filetop_write_fig.show()
filetop_write_fig.write_image("images/filetop-writes.jpeg")
with open(html_output_file, 'a') as f:
    f.write(filetop_read_fig.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(filetop_write_fig.to_html(full_html=False, include_plotlyjs='cdn'))


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.




The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [15]:
cachestat_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=cachestat")
cachestat_fig = px.bar(cachestat_df.toPandas(), x="kpps", y=["HITS", "MISSES", "DIRTIES"], barmode='group',
            title="Page cache hit vs miss ratio at different packet rate",
            labels={
                     "kpps": "Packets Per Second (thousand)",
                     "value": "Count",
                     "variable": "Cache Operation"
                 })
cachestat_fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
cachestat_fig.show()
cachestat_fig.write_image("images/cachestat.jpeg")
with open(html_output_file, 'a') as f:
    f.write(cachestat_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [16]:
with open(html_output_file, 'a') as f:
    f.write('<h2 id="disk-metrics"> Disk I/O Metrics </h2>')

In [17]:
biolatency_df = spark.read.option("basePath", "../ansible/.results").json("../ansible/.results/kpps=*/tool=biolatency").select(
    "kpps", "ts", "val_type", "tool",
    explode("data").alias("dataExplode")
).select(col("kpps").cast(StringType()), "ts", "val_type", "tool", "dataExplode.*", concat_ws('-',col('dataExplode.interval-start'),col('dataExplode.interval-end')).alias("interval"))
biolatency_fig = px.bar(biolatency_df.toPandas(), x="interval", y="count", color="kpps", barmode="group",
        title="Block I/O device latency",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of I/O",
                    "interval": "Latency (usecs)"
                })
biolatency_fig.show()
biolatency_fig.write_image("images/biolatency.jpeg")
with open(html_output_file, 'a') as f:
    f.write(biolatency_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [18]:
with open(html_output_file, 'a') as f:
    f.write('<h2 id="networking-metrics"> Netowrking Metrics </h2>')

In [19]:
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
data_df = spark.read.option("basePath", "../ansible/.results").json("../ansible/.results/kpps=*/tool=netsize")
schema = StructType([
  StructField('type', StringType(), True),
  StructField('kpps', StringType(), True),
  StructField('count', IntegerType(), True),
  StructField('min', IntegerType(), True),
  StructField('max', IntegerType(), True),
  StructField('device', StringType(), True),
  StructField('action', StringType(), True),
  ])

#Create empty DataFrame directly.
netsize_df = spark.createDataFrame([], schema)

types = data_df.select("data.*").columns
data_df = data_df.select("*", "data.*").drop("data")
for t in types: # for action of the actions -> e.g., recv_bytes
  columns = data_df.select("{}.*".format(t)).columns
  for c in columns: # for each device -> e.g., eth0
      df = data_df.select("type", "kpps", "tool", explode("{}.{}".format(t, c)).alias("dataExplode")).select("type", "kpps", "dataExplode.*").withColumn('device', lit(c)).withColumn('action', lit(t))
      netsize_df = netsize_df.union(df)

netsize_df = netsize_df.select("*", concat_ws(' - ',col('min'),col('max')).alias("interval"))
netsize_fig = px.bar(netsize_df.toPandas(), x="interval", y="count", color="kpps", barmode="group", facet_col="action",
        title="Packet Distribution",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of Packets",
                    "interval": "Packet Size (bytes)"
                })
netsize_fig.show()
netsize_fig.write_image("images/netsize.jpeg")
with open(html_output_file, 'a') as f:
    f.write(netsize_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [20]:
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
data_df = spark.read.option("basePath", "../ansible/.results").json("../ansible/.results/kpps=*/tool=nettxlat-dev")
schema = StructType([
  StructField('type', StringType(), True),
  StructField('kpps', StringType(), True),
  StructField('count', IntegerType(), True),
  StructField('min', IntegerType(), True),
  StructField('max', IntegerType(), True),
  StructField('device', StringType(), True),
  ])

#Create empty DataFrame directly.
nettxlat_df = spark.createDataFrame([], schema)

columns = data_df.select("data.@us.*").columns
data_df = data_df.select("*", "data.@us.*").drop("data")

for c in columns: # for each device -> e.g., eth0
    df = data_df.select("type", "kpps", "tool", explode(c).alias("dataExplode")).select("type", "kpps", "dataExplode.*").withColumn('device', lit(c))
    nettxlat_df = nettxlat_df.union(df)

nettxlat_df = nettxlat_df.select("*", concat_ws(' - ',col('min'),col('max')).alias("interval"))
nettxlat_fig = px.bar(nettxlat_df.toPandas(), x="interval", y="count", color="kpps", barmode="group", facet_row="device",
        title="Latency of the device queue",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of Packets",
                    "interval": "Latency (usecs)"
                })
nettxlat_fig.show()
nettxlat_fig.write_image("images/nettxlat.jpeg")
with open(html_output_file, 'a') as f:
    f.write(nettxlat_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [21]:
data_df = spark.read.option("basePath", "../ansible/.results").json("../ansible/.results/kpps=*/tool=skblife")
schema = StructType([
  StructField('kpps', StringType(), True),
  StructField('type', IntegerType(), True),
  StructField('count', IntegerType(), True),
  StructField('min', IntegerType(), True),
  StructField('max', IntegerType(), True),
  StructField('interval', StringType(), True),
  ])

#Create empty DataFrame directly.
skblife_df = spark.createDataFrame([], schema)

data_df = data_df.select("*", explode('data.@skb_residency_nsecs').alias("dataExplode")).select("type", "kpps", "dataExplode.*")
data_df = data_df.select(col("kpps").cast(StringType()), "type", "count", "min", "max", concat_ws(' - ',col('min'),col('max')).alias("interval"))
skblife_df = skblife_df.union(data_df)
skblife_fig = px.bar(skblife_df.toPandas(), x="interval", y="count", color="kpps", barmode="group",
        title="Distribution of lifespan or age of the kernel buffers",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of kernel buffers",
                    "interval": "Lifetime/Age (usecs)"
                })
skblife_fig.show()
skblife_fig.write_image("images/skblife.jpeg")
with open(html_output_file, 'a') as f:
    f.write(skblife_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [22]:
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
data_df = spark.read.option("basePath", "../ansible/.results").json("../ansible/.results/kpps=*/tool=sormem")
schema = StructType([
  StructField('type', StringType(), True),
  StructField('kpps', StringType(), True),
  StructField('count', IntegerType(), True),
  StructField('min', IntegerType(), True),
  StructField('max', IntegerType(), True),
  StructField('action', StringType(), True),
  ])

#Create empty DataFrame directly.
sormem_df = spark.createDataFrame([], schema)

columns = data_df.select("data.*").columns
data_df = data_df.select("*", "data.*").drop("data")
for c in columns:
    df = data_df.select("*", explode(c).alias("dataExplode")).select("type", "kpps", "dataExplode.*").withColumn('action', lit(c))
    sormem_df = sormem_df.union(df)

sormem_df = sormem_df.select("*", concat_ws(' - ',col('min'),col('max')).alias("interval"))
sormem_fig = px.bar(sormem_df.toPandas(), x="interval", y="count", color="kpps", barmode="group", facet_col="action",
        title="The number of packets and allocated size of the socket buffers and their limits",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of Sock RCV calls",
                    "interval": "Buffer Size"
                })
sormem_fig.show()
sormem_fig.write_image("images/sormem.jpeg")
with open(html_output_file, 'a') as f:
    f.write(sormem_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [23]:
hardirqs_count_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=hardirqs-count")
# print(df.rdd.getNumPartitions())
import plotly.express as px
hardirqs_count_fig = px.bar(hardirqs_count_df.toPandas(), x="kpps", y="count", color="hardirq",
            title="Total time spent servicing hard IRQs (hard interrupts)",
            labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Time (usecs)",
                    "hardirq": "Hardware Interupts"
                })
hardirqs_count_fig.show()
hardirqs_count_fig.write_image("images/hardirqs-count.jpeg")
with open(html_output_file, 'a') as f:
    f.write(hardirqs_count_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [24]:
hardirqs_dist_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=hardirqs-dist").select(
    "kpps", "ts", "val_type", "tool",
    explode("data").alias("dataExplode")
).select(col("kpps").cast(StringType()), "ts", "val_type", "tool", "dataExplode.*", concat_ws('-',col('dataExplode.interval-start'),col('dataExplode.interval-end')).alias("interval"))
import plotly.express as px
hardirqs_dist_fig = px.bar(hardirqs_dist_df.toPandas(), x="interval", y="count", color="kpps", barmode="group",
        title="The number of Hardware IRQ per duration distribution",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of Hardware IRQs",
                    "interval": "Time (usecs)"
                })
hardirqs_dist_fig.show()
hardirqs_dist_fig.write_image("images/hardirqs-dist.jpeg")
with open(html_output_file, 'a') as f:
    f.write(hardirqs_dist_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [25]:
softirqs_count_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=softirqs-count")
# print(df.rdd.getNumPartitions())
import plotly.express as px
softirqs_count_fig = px.bar(softirqs_count_df.toPandas(), x="kpps", y="count", color="softirq",
            title="Total time spent servicing soft IRQs",
            labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Time (usecs)",
                    "hardirq": "Hardware Interupts"
                })
softirqs_count_fig.show()
softirqs_count_fig.write_image("images/softirqs-count.jpeg")
with open(html_output_file, 'a') as f:
    f.write(softirqs_count_fig.to_html(full_html=False, include_plotlyjs='cdn'))

In [26]:
softirqs_dist_df = spark.read.option("basePath", "../ansible/.results").json(
"../ansible/.results/kpps=*/tool=softirqs-dist").select(
    "kpps", "ts", "val_type", "tool",
    explode("data").alias("dataExplode")
).select(col("kpps").cast(StringType()), "ts", "val_type", "tool", "dataExplode.*", concat_ws('-',col('dataExplode.interval-start'),col('dataExplode.interval-end')).alias("interval"))
# print(df.rdd.getNumPartitions())
softirqs_dist_df.printSchema()
import plotly.express as px
# softirqs_dist_df = softirqs_dist_df.select("*", concat_ws(' - ',col('interval-start'),col('interval-end')).alias("interval"))
softirqs_dist_fig = px.bar(softirqs_dist_df.toPandas(), x="interval", y="count", color="kpps", barmode="group",
        title="The number of SOFTIRQ per duration distribution",
        labels={
                    "kpps": "Packets Per Second (thousand)",
                    "count": "Number of Soft IRQs",
                    "interval": "Time (usecs)"
                })
softirqs_dist_fig.show()
softirqs_dist_fig.write_image("images/softirqs-dist.jpeg")
with open(html_output_file, 'a') as f:
    f.write(softirqs_dist_fig.to_html(full_html=False, include_plotlyjs='cdn'))

root
 |-- kpps: string (nullable = true)
 |-- ts: string (nullable = true)
 |-- val_type: string (nullable = true)
 |-- tool: string (nullable = true)
 |-- count: long (nullable = true)
 |-- interval-end: long (nullable = true)
 |-- interval-start: long (nullable = true)
 |-- interval: string (nullable = false)



In [27]:
with open(html_output_file, 'a') as f:
    f.write('<h2 id="flame-graphs"> Flame Graphs for analysing code paths </h2>')

In [28]:
import base64

file_list = glob.glob('../ansible/.results/kpps=*/tool=profile/*.txt')
with open(html_output_file, 'a') as f:
    f.write('<div id="profile">')
    for file in file_list:
        try:
            kpps = re.search('../ansible/.results/kpps=(.+?)/tool=profile/(.+?).txt', file).group(1)
            os.system("../FlameGraph/flamegraph.pl --color=java --title='On-CPU Time Flame Graph for {} kpps' < {} > images/{}kpps_profile.svg".format(kpps, file, kpps))
            with open("images/{}kpps_profile.svg".format(kpps), "rb") as image_file:
                kpps_profile = str(base64.b64encode(image_file.read()),'utf-8')
                f.write('<img id="1kpps_profile" src="data:image/svg+xml;base64,{}" />'.format(kpps_profile))
        except AttributeError:
            print("Error creating profile svg files")

    f.write('</div>')



In [29]:
with open(html_output_file, 'a') as f:
    f.write('<div id="offcputime">')
    file_list = glob.glob('../ansible/.results/kpps=*/tool=offcputime/*.txt')
    for file in file_list:
        try:
            kpps = re.search('../ansible/.results/kpps=(.+?)/tool=offcputime/(.+?).txt', file).group(1)
            os.system("../FlameGraph/flamegraph.pl --hash --bgcolors=blue --title='Off-CPU Time Flame Graph for {} kpps' < {} > images/{}kpps_offcputime.svg".format(kpps, file, kpps))
            with open("images/{}kpps_offcputime.svg".format(kpps), "rb") as image_file:
                kpps_offcputime = str(base64.b64encode(image_file.read()),'utf-8')
                f.write('<img src="data:image/svg+xml;base64,{}" />'.format(kpps_offcputime))
        except AttributeError:
            print("Error creating offcputime svg files")
    f.write('</div>')

In [30]:

file_list = glob.glob('../ansible/.results/kpps=*/tool=page_fault-user/*.txt')
with open(html_output_file, 'a') as f:
    f.write('<div id="page_fault-user">')
    for file in file_list:
        try:
            kpps = re.search('../ansible/.results/kpps=(.+?)/tool=page_fault-user/(.+?).txt', file).group(1)
            os.system("../FlameGraph/flamegraph.pl --hash --width=800 --title='Page Fault Flame Graph for {} kpps' --colors=java --bgcolor=green < {} > images/{}kpps_page_fault-user.svg".format(kpps, file, kpps))
            with open("images/{}kpps_page_fault-user.svg".format(kpps), "rb") as image_file:
                kpps_page_fault_user = str(base64.b64encode(image_file.read()),'utf-8')
                f.write('<img src="data:image/svg+xml;base64,{}" />'.format(kpps_page_fault_user))
        except AttributeError:
            print("Error creating page_fault-user svg files")
    f.write('</div>')

In [31]:
file_list = glob.glob('../ansible/.results/kpps=*/tool=page_fault-kernel/*.txt')
with open(html_output_file, 'a') as f:
    f.write('<div id="page_fault-kernel">')
    for file in file_list:
        try:
            kpps = re.search('../ansible/.results/kpps=(.+?)/tool=page_fault-kernel/(.+?).txt', file).group(1)
            os.system("../FlameGraph/flamegraph.pl --hash --width=800 --title='Kernel Page Fault Flame Graph for {} kpps' --colors=java --bgcolor=green < {} > images/{}kpps_page_fault-kernel.svg".format(kpps, file, kpps))
            with open("images/{}kpps_page_fault-kernel.svg".format(kpps), "rb") as image_file:
                kpps_page_fault_kernel = str(base64.b64encode(image_file.read()),'utf-8')
                f.write('<img src="data:image/svg+xml;base64,{}" />'.format(kpps_page_fault_kernel))
        except AttributeError:
            print("Error creating page_fault-kernel svg files")
    f.write('</div>')

In [32]:
# import base64

# with open('images/1kpps_profile.svg', "rb") as image_file:
#     kpps_profile = str(base64.b64encode(image_file.read()),'utf-8')

# with open('p_graph.html', 'a') as f:
#     f.write('<h1>Open GiLAN Testbed Results</h1>')
#     f.write('<div><a href="#1kpps_profile">See code path</a></div>')
#     f.write('<img id="1kpps_profile" src="data:image/svg+xml;base64,{}" />'.format(kpps_profile))
#     f.write(biolatency_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(skblife_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(filetop_read_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(filetop_write_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(netsize_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(runqlat_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(cachestat_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(sormem_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(vfsstat_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(hardirqs_count_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(hardirqs_dist_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(softirqs_count_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(softirqs_dist_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(nettxlat_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(vfsstat_fig.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(vfssize_fig_1.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(vfssize_fig_2.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(vfssize_fig_3.to_html(full_html=False, include_plotlyjs='cdn'))
#     f.write(sys_enter_brk_fig.to_html(full_html=False, include_plotlyjs='cdn'))
    # f.write(vfsstat_fig.to_html(full_html=False, include_plotlyjs='cdn'))