In [1]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install pandas matplotlib plotly pyspark
!{sys.executable} -m pip install -U kaleido
!{sys.executable} -m pip install nbformat>4.2.0



# Purpose

This notebooks graphs the performance results of 5G core networks. The traffic for the 5G core networks is generated using a [5G core traffic generator](https://github.com/tariromukute/core-tg). The performance results are collected using a bcc and bpftrace tools.

In [2]:
# configure spark variables
from pyspark.context import SparkContext
from pyspark.sql.context import SQLContext
from pyspark.sql.session import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
    
sc = SparkContext()
sqlContext = SQLContext(sc)
spark = SparkSession(sc)

# load up other dependencies
import re
import pandas as pd

import glob
import matplotlib.pyplot as plt
# import numpy as np

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/08/05 16:49:16 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [25]:
import os
if not os.path.exists("images"):
    os.mkdir("images")

import os
import glob
import plotly.express as px
from plotly.subplots import make_subplots
# from pyspark.sql.types import StructType,StructField, StringType, IntegerType
# from pyspark.sql.functions import expr
basePath = "../results"
results_path = "../docs/results"

In [5]:
# General chaterisation
import plotly; print(plotly.__version__)

5.23.0


In [6]:
import subprocess
import os

# Helper functions
def remove_noise_processes(df, field, values):
    a = df.loc[df[field].isin(values)].index.array.tolist()
    df.drop(a, inplace=True)
    return df

def pivot_dataframe_to_gnuplot_format(df, values, index='ues', columns='cn'):

    # Group the DataFrame by 'Country' and 'Year'
    grouped_data = df.groupby([index, columns]).sum()

    # Pivot the resulting grouped data
    pivoted_df = grouped_data.pivot_table(index=index, columns=columns, values=values).reset_index()

    return pivoted_df

def draw_gnuplot_linepoints(df, name, title, xlabel, ylabel):
    df.to_csv(f'gnuplot/{name}.csv', index=False)
    print(df.columns)
    # Write the Gnuplot script
    with open(f'gnuplot/{name}.gnu', 'w') as f:
        f.write('set style data linespoints\n')
        f.write('set term png\n')
        f.write(f"set output '{name}.png'\n")
        # f.write('set key outside left bottom horizontal spacing 1 width 2 height 1.5\n')
        f.write('set key noenhanced\n')
        f.write('set key top left\n')
        f.write('set key autotitle columnhead\n')
        f.write("set datafile separator ','\n")
        f.write(f'set title "{title}"\n')
        f.write('set grid xtics ytics mytics\n')
        f.write(f'set xlabel "{xlabel}"\n')
        f.write(f'set ylabel "{ylabel}"\n')
        f.write(' # Create theme \n \
        dpi = 600 ## dpi (variable) \n \
        width = 164.5 ## mm (variable) \n \
        height = 100 ## mm (variable) \n \
        \n \
        in2mm = 25.4 # mm (fixed) \n \
        pt2mm = 0.3528 # mm (fixed) \n \
        \n \
        mm2px = dpi/in2mm \n \
        ptscale = pt2mm*mm2px \n \
        round(x) = x - floor(x) < 0.5 ? floor(x) : ceil(x) \n \
        wpx = round(width * mm2px) \n \
        hpx = round(height * mm2px) \n \
        \n \
        set terminal pngcairo size wpx,hpx fontscale ptscale/1.4 linewidth ptscale pointscale ptscale \n \
        \n \
        colors = "blue red green brown black magenta orange purple sienna1 slategray tan1 yellow turquoise orchid khaki" \n ')
        f.write(f'plot for [i=2:{len(df.columns)}] "{name}.csv" u 1:i t columnhead lc rgb word(colors, i-1)')

    # Run the Gnuplot script
    
    # Relative path of the desired working directory
    relative_dir_path = 'gnuplot'
    
    # Get the absolute path of the working directory
    curr_dir = os.getcwd()
    
    # Create the full path to the desired directory
    my_dir_path = os.path.join(curr_dir, relative_dir_path)
    
    subprocess.call(['gnuplot', '-p', f'{name}.gnu'],  cwd=my_dir_path)
    # Change current working directory to 'mydir'
    # os.chdir('./gnuplot')

    # # Execute command 'mycommand' in the new directory
    # os.system('gnuplot -p cn_perf_ue_avg_exp.gnu')

    # display the image on the screen
    from IPython.display import Image
    Image(filename=f'gnuplot/{name}.png')

labels = {
    "ues": "Number of UEs",
    "time (ms)": "Time (ms)",
    "syscall": "System calls",
    "count": "Number of calls",
    "avg": "Average time per syscall (ms)",
    "avg_duration": "Time (ms)",
    "cn": "Core network",
    "rct": "Rate of change of N# of calls"
}

sunburst_colors = {
    'free5gc': 'red',
    'oai': 'blue',
    'open5gs': 'green'
}

noise_processes_excl_db = ['python3', 'systemd', 'snapd', 'sshd', 'sudo', 'multipathd', 'systemd-logind', 'systemd-timesyn', 'systemd-resolve', 'systemd-udevd', 'systemd-network', 'systemctl', 'accounts-daemon', 'dbus-daemon', '[unknown]']
noise_processes = noise_processes_excl_db + ['mongod', 'mysqld']

In [45]:
import pandas as pd 

perf_total_df = pd.read_csv(f"{basePath}/cn_perf_time_to_completion.csv")

title='Total time taken to complete Registration and De-registration procedures'
perf_total_fig = px.line(perf_total_df, x="ues", y=["free5gc", "open5gs", "oai"],
                title=title, markers=True, labels={'variable': 'Core network', 'value': 'Time (s)'})
perf_total_html = perf_total_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/cn_perf_time_to_completion.html", 'w+') as f:
    f.write(perf_total_html)

perf_avg_df = pd.read_csv(f"{basePath}/cn_perf_ue_avg_exp.csv")

title='Average time taken to complete Registration and De-registration procedures'
perf_avg_fig = px.line(perf_avg_df, x="ues", y=["free5gc", "open5gs", "oai"],
                title=title, markers=True)

perf_avg_html = perf_avg_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/cn_perf_ue_avg_exp.html", 'w+') as f:
    f.write(perf_avg_html)

In [26]:
""" This shows how the usage syscalls change as the load changes. 
(a) The number of syscalls as the traffic load increases
(b) The time spent executing syscalls as the traffic increases
(c) The average time spent per syscall as the traffic increases
This can tell us:
1. How the core network is architectures to respond to increasing load
2. Comparing can tell us the core network that sends more time on syscalls. We can use that to corellate to the performance of the core network
3. We have the details on overal performance of the core networks, we can look at the results that correlate the performance
4. Is there a general trend to syscalls that can show well architected e.g., the latency should increase as load increase etc.
If there is an ideal trend or correlation, does it match the trend of the core networks and correlate to the performance we are seeing
"""

top_n = 5
syscount_df = spark.read.option("basePath", basePath).json(
f"{basePath}/cn=*/ues=*/tool=syscount")

df_syscount = syscount_df.toPandas().groupby(['cn', 'ues']).agg({ 'count': 'sum', 'time (ms)': 'sum' }).reset_index()

df_syscount['avg'] = (df_syscount['time (ms)'] / df_syscount['count'])

title='Syscalls across the system (by latency)'
syscount_fig = px.line(df_syscount, x="ues", y="time (ms)", color="cn", labels=labels,
                title=title, markers=True)
# syscount_fig.show()
syscount_fig.write_image("./plotly/syscount_latency.jpeg")
syscount_html = syscount_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/syscount_latency.html", 'w+') as f:
    f.write(syscount_html)

gnuplot_df = pivot_dataframe_to_gnuplot_format(df_syscount, 'time (ms)')
draw_gnuplot_linepoints(gnuplot_df, name='syscount_latency', title=title,
                        xlabel='Number of UEs', ylabel=labels['time (ms)'])

title=f'Syscalls across the system (by number of calls)'
sysprocess_count_fig = px.line(df_syscount, x="ues", y="count", color="cn", labels=labels,
                hover_data=["count", "time (ms)"],
                title=title, markers=True)
# sysprocess_count_fig.show()
sysprocess_count_fig.write_image("plotly/syscount_count.jpeg")
sysprocess_count_html = sysprocess_count_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/syscount_count.html", 'w+') as f:
    f.write(sysprocess_count_html)

gnuplot_df = pivot_dataframe_to_gnuplot_format(df_syscount, 'count')
draw_gnuplot_linepoints(gnuplot_df, name='syscount_count', title=title,
                        xlabel='Number of UEs', ylabel=labels['count'])

title=f'Syscalls across the system (by average latency)'
syscount_avg_fig = px.line(df_syscount, x="ues", y="avg", color="cn", labels=labels,
                hover_data=["count", "time (ms)"],
                title=title, markers=True)
# sysprocess_count_fig.show()
syscount_avg_fig.write_image("plotly/syscount_avg.jpeg")
syscount_avg_html = syscount_avg_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/syscount_avg.html", 'w+') as f:
    f.write(syscount_avg_html)

gnuplot_df = pivot_dataframe_to_gnuplot_format(df_syscount, 'avg')
draw_gnuplot_linepoints(gnuplot_df, name='syscount_avg', title=title,
                        xlabel='Number of UEs', ylabel=labels['avg'])


Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')


In [28]:
""" Show the system calls that are called for each core network. This is aggregated for all UEs and not broken down by UE

This can tell us:
1. The system calls mostly involved and their proportional sizes per core network
2. The core network with mostly system calls active and thier proportional sizes 
"""

top_n = 5

syscount_df = spark.read.option("basePath", basePath).json(
f"{basePath}/cn=*/ues=*/tool=syscount")

df_syscount = syscount_df.toPandas().groupby(['cn', 'syscall']).agg({ 'count': 'sum', 'time (ms)': 'sum' }).reset_index()

title="Syscalls per core network (by latency)"
sunburst_fig = px.sunburst(df_syscount, path=['cn', 'syscall'], values='time (ms)',
            color_discrete_map=sunburst_colors,
            color='cn', hover_data=['time (ms)'],
            title=title)

sunburst_fig.update_traces(textinfo="label+percent root")
# sunburst_fig.show()
sunburst_fig.write_image(f"plotly/grouped_syscount_latency.jpeg")
sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/grouped_syscount_latency.html", 'w+') as f:
    f.write(sunburst_html)

title="Syscalls per core network (by number of calls)"
sunburst_fig = px.sunburst(df_syscount, path=['cn', 'syscall'], values='count',
            color_discrete_map=sunburst_colors,
            color='cn', hover_data=['time (ms)'],
            title=title)

sunburst_fig.update_traces(textinfo="label+percent root")
sunburst_fig.show()
sunburst_fig.write_image(f"plotly/grouped_syscount_count.jpeg")
sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/grouped_syscount_count.html", 'w+') as f:
    f.write(sunburst_html)


In [9]:
""" A tabular view with ratios of the most sum of (latency per syscall, count per syscall and average latency of syscall). The tabular view will
1. Show us for each core network what is the ratio of a syscall precense over the other e.g., recvfrom has 4x more latency than sendto
2. Across core networks, we can compare the ratio of presence of a syscall e.g., free5gc invokes recvfrom 4x more than open5gs
3. For grouped syscalls, we can tell which flavor a given call network uses more e.g., for multiplexing syscalls, we can may see that free5gc uses
select more than epoll_wait and infer based on the relative performance of them
4. In addition to (3), for different core networks we can see that e.g., free5gc use select which is 4x more that epoll_wait being used by open5gs.
Tying this with the theory of the syscall we may be able to get the reasons for difference in performance

"""

' A tabular view with ratios of the most sum of (latency per syscall, count per syscall and average latency of syscall). The tabular view will\n1. Show us for each core network what is the ratio of a syscall precense over the other e.g., recvfrom has 4x more latency than sendto\n2. Across core networks, we can compare the ratio of presence of a syscall e.g., free5gc invokes recvfrom 4x more than open5gs\n3. For grouped syscalls, we can tell which flavor a given call network uses more e.g., for multiplexing syscalls, we can may see that free5gc uses\nselect more than epoll_wait and infer based on the relative performance of them\n4. In addition to (3), for different core networks we can see that e.g., free5gc use select which is 4x more that epoll_wait being used by open5gs.\nTying this with the theory of the syscall we may be able to get the reasons for difference in performance\n\n'

In [29]:
""" The top X active processes per core network
We can look at:
1. The composition of the core network, the system calls that run or maintain the system
2. It can tell us what the system spends most of it's time on
3. For these we can see if they syscalls follow the 'ideal trend' of responding to traffic load

"""
top_n = 6

def top_processes(df, field):
    label_maxes = df.groupby(['comm'])[field].sum().sort_values(ascending=False)

    # Select the top n labels with the highest y-values
    top_labels = label_maxes.head(top_n).index.tolist()

    return top_labels

sysprocess_df = spark.read.option("basePath", basePath).json(
f"{basePath}/cn=*/ues=*/tool=sysprocess")

df_sysprocess = sysprocess_df.toPandas()
df_process = remove_noise_processes(df_sysprocess, 'comm', noise_processes_excl_db)
df_process['avg'] = (df_process['time (ms)'] / df_process['count'])

top_labels = top_processes(df_process, 'time (ms)')

sunburst_fig = px.sunburst(df_process[df_process['comm'].isin(top_labels)], path=['cn', 'comm'], values='time (ms)',
            color_discrete_map=sunburst_colors,
            color='cn', hover_data=['time (ms)'],
            title=f"Processes making syscall (by latency)")
sunburst_fig.update_traces(textinfo="label+percent root")
# sunburst_fig.show()
sunburst_fig.write_image(f"plotly/grouped_sysprocess_latency.jpeg")
sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/grouped_sysprocess_latency.html", 'w+') as f:
    f.write(sunburst_html)

top_labels = top_processes(df_process, 'count')

sunburst_fig = px.sunburst(df_process[df_process['comm'].isin(top_labels)], path=['cn', 'comm'], values='count',
            color_discrete_map=sunburst_colors,
            color='cn', hover_data=['time (ms)'],
            title=f"Processes making syscall (by number of calls)")
sunburst_fig.update_traces(textinfo="label+percent root")
# sunburst_fig.show()
sunburst_fig.write_image(f"plotly/grouped_sysprocess_count.jpeg")
sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/grouped_sysprocess_count.html", 'w+') as f:
    f.write(sunburst_html)

top_labels = top_processes(df_process, 'avg')

sunburst_fig = px.sunburst(df_process[df_process['comm'].isin(top_labels)], path=['cn', 'comm'], values='avg',
            color_discrete_map=sunburst_colors,
            color='cn', hover_data=['time (ms)'],
            title=f"Processes making syscall (by average latency)")
sunburst_fig.update_traces(textinfo="label+percent root")
# sunburst_fig.show()
sunburst_fig.write_image(f"plotly/grouped_sysprocess_avg.jpeg")
sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/grouped_sysprocess_avg.html", 'w+') as f:
    f.write(sunburst_html)

# Create line graphs for the all process per core network
top_labels = top_processes(df_process, 'count')
df_cn_process = remove_noise_processes(df_sysprocess, 'comm', noise_processes)
df_cn_process = df_cn_process.groupby(['ues', 'cn']).agg({ 'count': 'sum', 'time (ms)': 'sum'}).reset_index()

title=f'Core networks: Top {top_n} active processes making syscall (by number of calls)'
sysprocess_fig = px.line(df_cn_process.sort_values('ues'),
        x="ues", y="count", color="cn",
        hover_data=["count", "time (ms)"],
        labels=labels,
        title=title,
        markers=True)
# sysprocess_fig.show()
sysprocess_fig.write_image(f"plotly/core_network_sum_sysprocess_count.jpeg")
sysprocess_html = sysprocess_fig.to_html(full_html=False, include_plotlyjs='cdn')
with open(f"{results_path}/core_network_sum_sysprocess_count.html", 'w+') as f:
    f.write(sysprocess_html)

gnuplot_df = pivot_dataframe_to_gnuplot_format(df_cn_process, 'count', index='ues', columns='cn')
draw_gnuplot_linepoints(gnuplot_df, name=f'core_network_sum_sysprocess_count', title=title,
                xlabel='Number of UEs', ylabel=labels['count'])


# html += '</br>' + "<h4>Performance per process</h4>"
# For the active processes remove databases
df_process = remove_noise_processes(df_sysprocess, 'comm', noise_processes)
df_process['avg'] = (df_process['time (ms)'] / df_process['count'])

grouped_data = df_process.groupby(['cn'])
for group_name, group_df in grouped_data:

     top_labels = top_processes(group_df, 'count')

     title=f'{group_name[0]}: Top {top_n} active processes making syscall (by number of calls)'
     sysprocess_fig = px.line(group_df[group_df['comm'].isin(top_labels)].sort_values('ues'),
                x="ues", y="count", color="comm",
                hover_data=["count", "time (ms)"],
                labels=labels,
                title=title,
                markers=True)
    #  sysprocess_fig.show()
     sysprocess_fig.write_image(f"plotly/{group_name[0]}_sysprocess_count.jpeg")
     sysprocess_html = sysprocess_fig.to_html(full_html=False, include_plotlyjs='cdn')
     with open(f"{results_path}/{group_name[0]}_sysprocess_count.html", 'w+') as f:
        f.write(sysprocess_html)

     gnuplot_df = pivot_dataframe_to_gnuplot_format(group_df[group_df['comm'].isin(top_labels)], 'count', index='ues', columns='comm')
     draw_gnuplot_linepoints(gnuplot_df, name=f'{group_name[0]}_sysprocess_count', title=title,
                        xlabel='Number of UEs', ylabel=labels['count'])
     

Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'amf', 'ausf', 'nrf', 'pcf', 'udm', 'udr'], dtype='object', name='comm')
Index(['ues', 'amf', 'ausf', 'nrf', 'smf', 'udm', 'udr'], dtype='object', name='comm')
Index(['ues', 'open5gs-amfd', 'open5gs-ausfd', 'open5gs-nrfd', 'open5gs-scpd',
       'open5gs-udmd', 'open5gs-udrd'],
      dtype='object', name='comm')


In [30]:
""" For each of the syscalls, we plot for the core networks, the amount of times and occurances. 

This can tell us:
1. How the core network are architected to respond to load for different operations e.g., how their socket read logic is implemented
to work and how that responses to change in traffic load
2. Relative to other core network which syscalls it uses the most. For example this can tell us the syscall that has the most differentiating factor, 
e.g., if all syscalls are relatively the same and there is a huge difference for sched_yield, then it is likely the differentiating syscall or design

"""
import pandas as pd
import plotly.graph_objs as go


def grouped_syscall_stats(df_sysprocess, syscall, writer=None):

    cn_df = df_sysprocess.groupby(['cn', 'ues']).agg({ 'count': 'sum', 'time (ms)': 'sum' }).reset_index()

    cn_df['avg'] = (cn_df['time (ms)'] / cn_df['count'])

    title=f'Core network syscall {syscall} (by latency)'
    sysprocess_count_fig = px.line(cn_df.sort_values('ues'),
                    x="ues", y="time (ms)", color="cn", 
                    hover_data=["count", "time (ms)"],
                    labels=labels,
                    title=title,
                    markers=True)
    # sysprocess_count_fig.show()
    sysprocess_count_fig.write_image(f"plotly/core_network_on_{syscall}_latency.jpeg")

    gnuplot_df = pivot_dataframe_to_gnuplot_format(cn_df, 'count', index='ues', columns='cn')
    draw_gnuplot_linepoints(gnuplot_df, name=f'core_network_on_{syscall}_latency', title=title,
                        xlabel='Number of UEs', ylabel=labels['time (ms)'])
    
    title=f'Core network syscall {syscall} (by number of calls)'
    sysprocess_count_fig = px.line(cn_df.sort_values('ues'),
                    x="ues", y="count", color="cn",
                    hover_data=["count", "time (ms)"],
                    labels=labels,
                    title=title,
                    markers=True)
    # sysprocess_count_fig.show()
    sysprocess_count_fig.write_image(f"plotly/core_network_on_{syscall}_count.jpeg")
    sysprocess_count_html = sysprocess_count_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/core_network_on_{syscall}_count.html", 'w+') as f:
        f.write(sysprocess_count_html)

    gnuplot_df = pivot_dataframe_to_gnuplot_format(cn_df, 'count', index='ues', columns='cn')
    draw_gnuplot_linepoints(gnuplot_df, name=f'core_network_on_{syscall}_count', title=title,
                        xlabel='Number of UEs', ylabel=labels['count'])

    title=f'Core network syscall {syscall} (by average latency)'
    sysprocess_count_fig = px.line(cn_df.sort_values('ues'),
                    x="ues", y="avg", color="cn",
                    hover_data=["count", "time (ms)"],
                    labels=labels,
                    title=title,
                    markers=True)
    # sysprocess_count_fig.show()
    sysprocess_count_fig.write_image(f"plotly/core_network_on_{syscall}_avg.jpeg")
    sysprocess_count_html = sysprocess_count_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/core_network_on_{syscall}_avg.html", 'w+') as f:
        f.write(sysprocess_count_html)

    gnuplot_df = pivot_dataframe_to_gnuplot_format(cn_df, 'avg', index='ues', columns='cn')
    draw_gnuplot_linepoints(gnuplot_df, name=f'core_network_on_{syscall}_avg', title=title,
                        xlabel='Number of UEs', ylabel=labels['avg'])


    return cn_df


def grouped_processes_stats(df_sysprocess, writer=None):

    comm_df = df_sysprocess.groupby(['cn', 'comm']).agg({ 'count': 'sum', 'time (ms)': 'sum' }).reset_index()

    comm_df['avg'] = (comm_df['time (ms)'] / comm_df['count'])

    sunburst_fig = px.sunburst(comm_df, path=['cn', 'comm'], values='time (ms)',
                  color_discrete_map=sunburst_colors,
                  color='cn', hover_data=['count'],
                  title=f"Processes making {syscall} syscall (by latency)")
    sunburst_fig.update_traces(textinfo="label+percent root")
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/grouped_sysprocess_on_{syscall}_latency.jpeg")
    sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/grouped_sysprocess_on_{syscall}_latency.html", 'w+') as f:
        f.write(sunburst_html)

    sunburst_fig = px.sunburst(comm_df, path=['cn', 'comm'], values='count',
                  color_discrete_map=sunburst_colors,
                  color='cn', hover_data=['time (ms)'],
                  title=f"Processes making {syscall} syscall (by number of calls)")
    sunburst_fig.update_traces(textinfo="label+percent root")
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/grouped_sysprocess_on_{syscall}_count.jpeg")
    sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/grouped_sysprocess_on_{syscall}_count.html", 'w+') as f:
        f.write(sunburst_html)

    sunburst_fig = px.sunburst(comm_df, path=['cn', 'comm'], values='avg',
                  color_discrete_map=sunburst_colors,
                  color='cn', hover_data=['time (ms)'],
                  title=f"Processes making {syscall} syscall (by average latency)")
    sunburst_fig.update_traces(textinfo="label+percent root")
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/grouped_sysprocess_on_{syscall}_avg.jpeg")
    sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/grouped_sysprocess_on_{syscall}_avg.html", 'w+') as f:
        f.write(sunburst_html)

    return comm_df

def grouped_syscall_types(syscalls, syscall_type):

    df = pd.DataFrame()
    for syscall in syscalls:
        sysprocess_df = spark.read.option("basePath", basePath).json(
            f"{basePath}/cn=*/ues=*/tool=sysprocess_{syscall}")
        df1 = sysprocess_df.toPandas()
        df1['syscall'] = syscall
        df = pd.concat([df, df1])

    df = df.reset_index(drop=True)
    df_syscall = remove_noise_processes(df, 'comm', noise_processes)
    syscall_df = df_syscall.groupby(['cn', 'syscall', 'comm']).agg({ 'count': 'sum', 'time (ms)': 'sum' }).reset_index()

    syscall_df['avg'] = (syscall_df['time (ms)'] / syscall_df['count'])

    sunburst_fig = px.sunburst(syscall_df, path=['cn', 'syscall', 'comm'], values='time (ms)',
                  color_discrete_map=sunburst_colors,
                  color='cn', hover_data=['count'],
                #   title=f"Core networks making {syscall_type} syscall (by latency)"
                  )
    sunburst_fig.update_traces(textinfo="label+percent root")
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/grouped_systypes_on_{syscall_type.replace('/', '')}_latency.jpeg")
    sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/grouped_systypes_on_{syscall_type.replace('/', '')}_latency.html", 'w+') as f:
        f.write(sunburst_html)

    sunburst_fig = px.sunburst(syscall_df, path=['cn', 'syscall', 'comm'], values='count',
                  color_discrete_map=sunburst_colors,
                  color='cn', hover_data=['time (ms)'],
                #   title=f"Core networks making {syscall_type} syscall (by number of calls)"
                  )
    sunburst_fig.update_traces(textinfo="label+percent root")
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/grouped_systypes_on_{syscall_type.replace('/', '')}_count.jpeg")
    sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/grouped_systypes_on_{syscall_type.replace('/', '')}_count.html", 'w+') as f:
        f.write(sunburst_html)


    sunburst_fig = px.sunburst(syscall_df, path=['cn', 'syscall', 'comm'], values='avg',
                  color='cn', hover_data=['time (ms)'],
                #   title=f"Core networks making {syscall_type} syscall (by average latency)"
                  )
    sunburst_fig.update_traces(textinfo="label+percent root")
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/grouped_systypes_on_{syscall_type.replace('/', '')}_avg.jpeg")
    sunburst_html = sunburst_fig.to_html(full_html=False, include_plotlyjs='cdn')
    with open(f"{results_path}/grouped_systypes_on_{syscall_type.replace('/', '')}_avg.html", 'w+') as f:
        f.write(sunburst_html)

    return None

def compute_grouped_stats(syscall, summary_df):
    html = ""
    sysprocess_df = spark.read.option("basePath", basePath).json(
    f"{basePath}/cn=*/ues=*/tool=sysprocess_{syscall}")

    df_sysprocess = sysprocess_df.toPandas()
    df1 = df_sysprocess
    # df1 = remove_noise_processes(df_sysprocess, 'comm', noise_processes)
    syscall_df, lhtml = grouped_syscall_stats(df1, syscall, writer)
    html += '</br>' + lhtml   

    comm_df, lhtml = grouped_processes_stats(df1, writer)
    html += '</br>' + lhtml   

    # Get the summary
    df2 = comm_df.groupby(['cn']).agg({ 'count': 'sum', 'time (ms)': 'sum', 'avg': 'sum' }).reset_index()
    
    df2['syscall'] = syscall

    summary_df = pd.concat([summary_df, df2])
    summary_df = summary_df.reset_index(drop=True)
    return None

# writer = pd.ExcelWriter('ActiveProcessesPerSyscall-WithoutNoiseProcesses.xlsx', engine='xlsxwriter')
writer = None
noise_processes = ['python3', 'systemd', 'snapd', 'sshd', 'sudo', 'multipathd', 'systemd-logind', 'systemd-timesyn', 'systemd-resolve', 'systemd-udevd', 'systemd-network', 'systemctl', 'accounts-daemon', 'dbus-daemon', 'mongod', 'mysqld', '[unknown]']

io_multiplex_syscalls = ['epoll_wait', 'poll', 'ppoll', 'epoll_pwait', 'select']
grouped_syscall_types(io_multiplex_syscalls, 'IO Multiplexing')
print("Syscalls for io multiplexing")
# Run for each syscall
grouped_io_df = pd.DataFrame(columns=['cn', 'count', 'time (ms)', 'avg', 'syscall'])
for syscall in io_multiplex_syscalls:
    compute_grouped_stats(syscall, grouped_io_df)  

grouped_io_df = pd.DataFrame(columns=['cn', 'count', 'time (ms)', 'avg', 'syscall'])
socket_files_syscalls = ['read', 'write']
grouped_syscall_types(socket_files_syscalls, 'Files')
print("Syscalls for read or write for files operations")
for syscall in socket_files_syscalls:
    compute_grouped_stats(syscall, grouped_io_df)

grouped_io_df = pd.DataFrame(columns=['cn', 'count', 'time (ms)', 'avg', 'syscall'])
socket_read_syscalls = [ 'recvmsg', 'recvfrom']
grouped_syscall_types(socket_read_syscalls, 'Receive')
print("Syscalls for socket read operations")
for syscall in socket_read_syscalls:
    compute_grouped_stats(syscall, grouped_io_df)

grouped_io_df = pd.DataFrame(columns=['cn', 'count', 'time (ms)', 'avg', 'syscall'])
socket_write_syscalls = ['sendto', 'sendmsg' ]
grouped_syscall_types(socket_write_syscalls, 'Send')
print("Syscalls for socket write operations")
for syscall in socket_write_syscalls:
    compute_grouped_stats(syscall, grouped_io_df)


grouped_io_df = pd.DataFrame(columns=['cn', 'count', 'time (ms)', 'avg', 'syscall'])
time_syscalls = ['clock_nanosleep', 'nanosleep']
grouped_syscall_types(time_syscalls, 'Time')
print("Syscalls for process time operations")
for syscall in time_syscalls:
    compute_grouped_stats(syscall, grouped_io_df)


grouped_io_df = pd.DataFrame(columns=['cn', 'count', 'time (ms)', 'avg', 'syscall'])
locks_syscalls = ['futex']
grouped_syscall_types(locks_syscalls, 'Locks')
print("Syscalls for locks operations")
for syscall in locks_syscalls:
    compute_grouped_stats(syscall, grouped_io_df)

grouped_io_df = pd.DataFrame(columns=['cn', 'count', 'time (ms)', 'avg', 'syscall'])
control_syscalls = ['sched_yield']
grouped_syscall_types(control_syscalls, 'Control operations')

for syscall in control_syscalls:
    compute_grouped_stats(syscall, grouped_io_df)



Syscalls for io multiplexing
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Syscalls for read or write for files operations
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Syscalls for socket read operations
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Syscalls for socket write operations
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Syscalls for process time operations
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Syscalls for locks operations
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Syscalls for control operations
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')
Index(['ues', 'free5gc', 'oai', 'open5gs'], dtype='object', name='cn')



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [14]:
""" Show the characterisation of each process. We show for each process the stats for grouped syscalls
e.g., for opne5gs-amfd we show results for the multiplexing system calls

For each of the core networks, we show their processes and then
1. The syscalls active for the process by the frequency - line graph as UE increase and sunburst
2. The syscalls active for the process by the latency - line graph as UE increase and sunburst
3. The syscalls active for the process by the average - line graph as UE increase and sunburst
"""

import re

noise_processes = ['python3', 'systemd', 'snapd', 'sshd', 'sudo', 'multipathd', 'systemd-logind', 'systemd-timesyn', 'systemd-resolve', 'systemd-udevd', 'systemd-network', 'systemctl', 'accounts-daemon', 'dbus-daemon', 'mongod', 'mysqld', '[unknown]']

noise_processes = noise_processes + ['rsyslogd', 'systemd-journal', 'irqbalance', 'fwupd']

def grouped_process_and_syscall_types(syscalls, syscall_type, core_network):
    df = pd.DataFrame()
    for syscall in syscalls:
        sysprocess_df = spark.read.option("basePath", basePath).json(
            f"{basePath}/cn={core_network}/ues=*/tool=sysprocess_{syscall}")
        df1 = sysprocess_df.toPandas()
        df1['syscall'] = syscall
        df = pd.concat([df, df1])

    df = df.reset_index(drop=True)
    df_syscall = remove_noise_processes(df, 'comm', noise_processes)
    syscall_df = df_syscall.groupby(['comm', 'ues', 'syscall']).agg({ 'count': 'sum', 'time (ms)': 'sum' }).reset_index()

    syscall_df['avg'] = (syscall_df['time (ms)'] / syscall_df['count'])

    # Sunburst summarising
    title=f"{core_network}: {syscall_type} syscalls (by latency)"
    file_name = re.sub(r'[^\w\s]','_', title).replace(' ', '_')
    sunburst_fig = px.sunburst(syscall_df, path=['comm', 'ues', 'syscall'], values='time (ms)',
                  color='comm', hover_data=['count'],
                  title=title
                  )
    sunburst_fig.update_traces(textinfo="label+percent root")
    sunburst_fig.update_traces(sort=False, selector=dict(type='sunburst')) 
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/{file_name}.jpeg")

    # Line graph
    for process_name, process_syscall_df in syscall_df.groupby('comm'):
        title=f"{core_network}: {syscall_type} syscalls by {process_name} (by latency)"
        sysprocess_count_fig = px.line(process_syscall_df.sort_values('ues'),
                        x="ues", y="time (ms)", color="syscall", 
                        facet_row="comm",
                        hover_data=["count", "time (ms)"],
                        labels=labels,
                        title=title,
                        markers=True)
        # sysprocess_count_fig.show()
        file_name = re.sub(r'[^\w\s]','_', title).replace(' ', '_')
        sysprocess_count_fig.write_image(f"plotly/{file_name}.jpeg")

        gnuplot_df = pivot_dataframe_to_gnuplot_format(process_syscall_df, 'time (ms)', 'ues', 'syscall')
        draw_gnuplot_linepoints(gnuplot_df, name=file_name, title=title,
                        xlabel='Number of UEs', ylabel=labels['time (ms)'])
    
    title=f"{core_network}: {syscall_type} syscalls (by number of calls)"
    file_name = re.sub(r'[^\w\s]','_', title).replace(' ', '_')
    sunburst_fig = px.sunburst(syscall_df, path=['comm', 'ues', 'syscall'], values='count',
                  color='comm', hover_data=['time (ms)'],
                  title=title
                  )
    sunburst_fig.update_traces(textinfo="label+percent root")
    sunburst_fig.update_traces(sort=False, selector=dict(type='sunburst'))
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/{file_name}.jpeg")

    # Line graph
    for process_name, process_syscall_df in syscall_df.groupby('comm'):
        title=f"{core_network}: {syscall_type} syscalls by {process_name} (by number of calls)"
        sysprocess_count_fig = px.line(process_syscall_df.sort_values('ues'),
                        x="ues", y="count", color="syscall", 
                        facet_row="comm",
                        hover_data=["count", "time (ms)"],
                        labels=labels,
                        title=title,
                        markers=True)
        # sysprocess_count_fig.show()
        file_name = re.sub(r'[^\w\s]','_', title).replace(' ', '_')
        sysprocess_count_fig.write_image(f"plotly/{file_name}.jpeg")

        gnuplot_df = pivot_dataframe_to_gnuplot_format(process_syscall_df, 'count', 'ues', 'syscall')
        draw_gnuplot_linepoints(gnuplot_df, name=file_name, title=title,
                        xlabel='Number of UEs', ylabel=labels['count'])

    title=f"{core_network}: {syscall_type} syscalls (by average latency)"
    file_name = re.sub(r'[^\w\s]','_', title).replace(' ', '_')
    sunburst_fig = px.sunburst(syscall_df, path=['comm', 'ues', 'syscall'], values='avg',
                  color='comm', hover_data=['time (ms)'],
                  title=title
                  )
    sunburst_fig.update_traces(textinfo="label+percent root")
    sunburst_fig.update_traces(sort=False, selector=dict(type='sunburst'))
    # sunburst_fig.show()
    sunburst_fig.write_image(f"plotly/{file_name}.jpeg")

    # Line graph
    for process_name, process_syscall_df in syscall_df.groupby('comm'):
        title=f"{core_network}: {syscall_type} syscalls by {process_name} (by average latency)"
        sysprocess_count_fig = px.line(process_syscall_df.sort_values('ues'),
                        x="ues", y="avg", color="syscall", 
                        facet_row="comm",
                        hover_data=["count", "time (ms)"],
                        labels=labels,
                        title=title,
                        markers=True)
        # sysprocess_count_fig.show()
        file_name = re.sub(r'[^\w\s]','_', title).replace(' ', '_')
        sysprocess_count_fig.write_image(f"plotly/{file_name}.jpeg")

        gnuplot_df = pivot_dataframe_to_gnuplot_format(process_syscall_df, 'avg', 'ues', 'syscall')
        draw_gnuplot_linepoints(gnuplot_df, name=file_name, title=title,
                        xlabel='Number of UEs', ylabel=labels['avg'])

io_multiplex_syscalls = ['epoll_wait', 'poll', 'ppoll', 'epoll_pwait', 'select']
socket_files_syscalls = ['read', 'write']
socket_write_syscalls = ['sendto', 'sendmsg']
socket_read_syscalls = [ 'recvmsg', 'recvfrom']
time_syscalls = ['clock_nanosleep', 'nanosleep']
locks_syscalls = ['futex']
control_syscalls = ['sched_yield']

# Free 5GC
grouped_process_and_syscall_types(io_multiplex_syscalls, 'IO Multiplexing', 'free5gc')

grouped_process_and_syscall_types(socket_files_syscalls, 'File operation', 'free5gc')

grouped_process_and_syscall_types(socket_write_syscalls, 'Socket write', 'free5gc')

grouped_process_and_syscall_types(socket_read_syscalls, 'Socket read', 'free5gc')

grouped_process_and_syscall_types(time_syscalls, 'Sleep', 'free5gc')

grouped_process_and_syscall_types(locks_syscalls, 'Resource contention', 'free5gc')

grouped_process_and_syscall_types(control_syscalls, 'Scheduling', 'free5gc')


# Open5gs

grouped_process_and_syscall_types(io_multiplex_syscalls, 'IO Multiplexing', 'open5gs')

grouped_process_and_syscall_types(socket_files_syscalls, 'File operation', 'open5gs')

grouped_process_and_syscall_types(socket_write_syscalls, 'Socket write', 'open5gs')

grouped_process_and_syscall_types(socket_read_syscalls, 'Socket read', 'open5gs')

grouped_process_and_syscall_types(time_syscalls, 'Sleep', 'open5gs')

grouped_process_and_syscall_types(locks_syscalls, 'Resource contention', 'open5gs')

grouped_process_and_syscall_types(control_syscalls, 'Scheduling', 'open5gs')


# OAI

grouped_process_and_syscall_types(io_multiplex_syscalls, 'IO Multiplexing', 'oai')

grouped_process_and_syscall_types(socket_files_syscalls, 'File operation', 'oai')

grouped_process_and_syscall_types(socket_write_syscalls, 'Socket write', 'oai')

grouped_process_and_syscall_types(socket_read_syscalls, 'Socket read', 'oai')

grouped_process_and_syscall_types(time_syscalls, 'Sleep', 'oai')

grouped_process_and_syscall_types(locks_syscalls, 'Resource contention', 'oai')

grouped_process_and_syscall_types(control_syscalls, 'Scheduling', 'oai')



Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype=



Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')




Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')




Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'epoll_pwait'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read',



Index(['ues', 'sendmsg'], dtype='object', name='syscall')
Index(['ues', 'sendmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', 



Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype='object', name='syscall')
Index(['ues', 'nanosleep'], dtype=



Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')




Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')




Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'sched_yield'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'poll'], dtype='object', name='syscall')




Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait', 'poll'], dtype='object', name='syscall')
Index(['ues', 'epoll_wait'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'poll'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='ob



Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read'], dtype='object', name='syscall')




Index(['ues', 'read'], dtype='object', name='syscall')




Index(['ues', 'read'], dtype='object', name='syscall')




Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 



Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues



Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], dtype='object', name='syscall')
Index(['ues', 'recvfrom'], 



Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')




Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')




Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'fut



Index(['ues', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')




Index(['ues', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')




Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read'], dtype='object', name='syscall')
Index(['ues', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'read', 'write'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')




Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')




Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendmsg', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'sendto'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvmsg'], dtype='object', name='syscall')




Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'recvfrom', 'recvmsg'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')




Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'clock_nanosleep'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')




Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')




Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')




Index(['ues', 'futex'], dtype='object', name='syscall')




Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')




Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')




Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')




Index(['ues', 'futex'], dtype='object', name='syscall')
Index(['ues', 'futex'], dtype='object', name='syscall')


In [15]:
sysprocess_scatter_df = spark.read.option("basePath", basePath).json(
    f"{basePath}/cn=*/ues=*/tool=sysprocess")

df_sysprocess_scatter = sysprocess_scatter_df.toPandas()

# Calculate average rate of increase per slot
sorted_df = df_sysprocess_scatter.groupby(['cn', 'comm', 'ues']).first().sort_values(by='ues').reset_index()

sorted_df['diff'] = sorted_df.groupby(['cn', 'comm'])['count'].diff() / sorted_df.groupby(['cn', 'comm'])['ues'].diff()

sum_df = sorted_df.groupby(['cn', 'comm'])['count'].sum().to_frame()
duration_df = sorted_df.groupby(['cn', 'comm'])['time (ms)'].sum().to_frame()
mean_df = sorted_df.groupby(['cn', 'comm'])['diff'].mean().to_frame()


merged_df = pd.merge(sum_df, duration_df, on=['cn', 'comm']) \
             .merge(mean_df.rename(columns={'diff': 'rct'}), on=['cn', 'comm']).reset_index()


merged_df['avg_duration'] = (merged_df['time (ms)'] / 1000000)
merged_df["avg_duration"] = [0.25 if val < 1 else val for val in merged_df['avg_duration']]
merged_df = remove_noise_processes(merged_df, 'comm', noise_processes_excl_db)
fig = px.scatter(merged_df, x="count", y="rct",
	         size="avg_duration", color="cn",
             labels={
                "ues": "Number of UEs",
                "time (ms)": "Time (ms)",
                "syscall": "System calls",
                "count": "Cummulative number of calls",
                "avg": "Average time per syscall (ms)",
                "avg_duration": "Time (ms)",
                "cn": "Core network",
                "rct": "Rate of change of N# of calls"
            },
            #  title="Total number of calls against thier rate of change as the number of UEs generated increases per Core network. Each bubble represents a process.",
                 hover_name="comm", log_x=True, log_y=True, size_max=50)
fig.update_layout(title_font=dict(size=10))
x_avg = merged_df['count'].mean()
y_avg = merged_df['rct'].mean()
fig.add_vline(x=x_avg, line_width=1, opacity=0.5)
fig.add_hline(y=y_avg, line_width=1, opacity=0.5)

# Update layout with new dimensions as percentages
# fig.update_layout(
#     height=600,     
#     width=800
# )

fig.show()
fig.write_image(f"plotly/processes_scatter_summary.jpeg")

In [16]:
# Get the data
syscall_scatter_df = spark.read.option("basePath", basePath).json(
    f"{basePath}/cn=*/ues=*/tool=syscount")

df_syscall_scatter = syscall_scatter_df.toPandas()
df_syscall_scatter['avg'] = ((df_syscall_scatter['time (ms)'] / df_syscall_scatter['count']) / 100)
study_syscalls = ['futex', 'epoll_wait', 'epoll_wait2', 'epoll_pwait', 'epoll_pwait2', 'poll', 'ppoll', 'select', 'nanosleep', 'clock_nanosleep', 'read', 'write', 'recv', 'recvfrom', 'recvmsg', 'recvmmsg', 'send', 'sendto', 'sendmsg', 'sendmmsg', 'sched_yield']
df_syscall_scatter =df_syscall_scatter[df_syscall_scatter["syscall"].isin(study_syscalls)]

# Calculate average rate of increase per slot
sorted_df = df_syscall_scatter.groupby(['cn', 'syscall', 'ues']).first().sort_values(by='ues').reset_index()

sorted_df['diff'] = sorted_df.groupby(['cn', 'syscall'])['count'].diff() / sorted_df.groupby(['cn', 'syscall'])['ues'].diff()

sum_df = sorted_df.groupby(['cn', 'syscall'])['count'].sum().to_frame()
duration_df = sorted_df.groupby(['cn', 'syscall'])['time (ms)'].sum().to_frame()
mean_df = sorted_df.groupby(['cn', 'syscall'])['diff'].mean().to_frame()


merged_df = pd.merge(sum_df, duration_df, on=['cn', 'syscall']) \
             .merge(mean_df.rename(columns={'diff': 'rct'}), on=['cn', 'syscall']).reset_index()

merged_df['avg_duration'] = (merged_df['time (ms)'] / 1000000)
merged_df["avg_duration"] = [0.5 if val < 1 else val for val in merged_df['avg_duration']]
fig = px.scatter(merged_df, x="count", y="rct",
            labels={
                "ues": "Number of UEs",
                "time (ms)": "Time (ms)",
                "syscall": "System calls",
                "count": "Cummulative number of calls",
                "avg": "Average time per syscall (ms)",
                "avg_duration": "Time (ms)",
                "cn": "Core network",
                "rct": "Rate of change of N# of calls"
            },
            # title="Total number of calls against thier rate of change as the number of UEs generated increases per Core network. Each bubble represents a syscall.",
	         size="avg_duration", color="cn",
                hover_name="syscall",
                 log_x=True, log_y=True, size_max=50)
fig.update_layout(title_font=dict(size=10))
x_avg = merged_df['count'].mean()
y_avg = merged_df['rct'].mean()
fig.add_vline(x=x_avg, line_width=1, opacity=0.5)
fig.add_hline(y=y_avg, line_width=1, opacity=0.5)
#label each bubble
# fig.update_traces(textposition='top center')

# fig.update_layout(
#     height=600,     
#     width=800
# )

fig.show()
fig.write_image(f"plotly/syscalls_scatter_summary.jpeg")

In [17]:
# Get the data
syscall_scatter_df = spark.read.option("basePath", basePath).json(
    f"{basePath}/cn=*/ues=*/tool=syscount")

df_syscall_scatter = syscall_scatter_df.toPandas()
df_syscall_scatter['avg'] = ((df_syscall_scatter['time (ms)'] / df_syscall_scatter['count']) / 100)
study_syscalls = ['futex', 'epoll_wait', 'epoll_wait2', 'epoll_pwait', 'epoll_pwait2', 'poll', 'ppoll', 'select', 'nanosleep', 'clock_nanosleep', 'read', 'write', 'recv', 'recvfrom', 'recvmsg', 'recvmmsg', 'send', 'sendto', 'sendmsg', 'sendmmsg', 'sched_yield']
df_syscall_scatter =df_syscall_scatter[df_syscall_scatter["syscall"].isin(study_syscalls)]

# Calculate average rate of increase per slot
sorted_df = df_syscall_scatter.groupby(['cn', 'syscall', 'ues']).first().sort_values(by='ues').reset_index()

sorted_df['diff'] = sorted_df.groupby(['cn', 'syscall'])['count'].diff() / sorted_df.groupby(['cn', 'syscall'])['ues'].diff()

sum_df = sorted_df.groupby(['cn'])['count'].sum().to_frame()
duration_df = sorted_df.groupby(['cn'])['time (ms)'].sum().to_frame()
mean_df = sorted_df.groupby(['cn', 'syscall'])['diff'].mean().to_frame()
mean_2_df = mean_df.groupby(['cn'])['diff'].mean().to_frame()


merged_df = pd.merge(sum_df, duration_df, on=['cn']) \
             .merge(mean_2_df.rename(columns={'diff': 'rct'}), on=['cn']).reset_index()

merged_df.to_csv("merged_df.csv")
merged_df['avg_duration'] = (merged_df['time (ms)'] / 1000000)
merged_df["avg_duration"] = [0.5 if val < 1 else val for val in merged_df['avg_duration']]

fig = px.scatter(merged_df, x="count", y="rct",
            labels=labels,
            # title="Total number of calls against thier rate of change as the number of UEs generated increases per Core network. Each bubble represents a syscall.",
	         size="avg_duration", color="cn",
                # hover_name="syscall",
                 log_x=True, log_y=True, size_max=50)
fig.update_layout(title_font=dict(size=10))
x_avg = merged_df['count'].mean()
y_avg = merged_df['rct'].mean()
fig.add_vline(x=x_avg, line_width=1, opacity=0.5)
fig.add_hline(y=y_avg, line_width=1, opacity=0.5)
#label each bubble
# fig.update_traces(textposition='top center')

# fig.update_layout(
#     height=600,     
#     width=800
# )

# fig.update_layout(
#     yaxis=dict(
#         range=[-50, 200]
#     )
# )

fig.show()
fig.write_image(f"plotly/syscalls_scatter_summary.jpeg")

In [18]:
import re

my_string = "Hello / World: How, Are You?"

new_string = re.sub(r'[^\w\s]+','_', my_string).replace(' ', '_')

print(new_string) # Output: Hello___World__How__Are_You_


Hello___World__How__Are_You_


In [19]:
""" For each syscall look at the processes that are making the calls
(a) Graphs
(b) Tables with the sum per latency, count and average latency
This should give us:
1. An idea of the processes making use of the most relavant syscall or the syscall we are looking at in the study
2. It will give us an ide of the relavance of these processes and making it easier for us to analysis e.g., if the rsyslog system
is the most active process per syscall, we know we need to do further work to disable logs or looking at another logging mechanism
3. 
"""

' For each syscall look at the processes that are making the calls\n(a) Graphs\n(b) Tables with the sum per latency, count and average latency\nThis should give us:\n1. An idea of the processes making use of the most relavant syscall or the syscall we are looking at in the study\n2. It will give us an ide of the relavance of these processes and making it easier for us to analysis e.g., if the rsyslog system\nis the most active process per syscall, we know we need to do further work to disable logs or looking at another logging mechanism\n3. \n'