In [1]:
import numpy as np
import pandas as pd
import matplotlib
import glob as glob
# Blessed build for evaluation is
# http://build.ros2.org/job/Rci__nightly-performance_ubuntu_focal_amd64/97/artifact/ws/test_results/buildfarm_perf_tests/*.csv/*zip*/buildfarm_perf_tests.zip

In [2]:
! wget http://build.ros2.org/job/Rci__nightly-performance_ubuntu_focal_amd64/97/artifact/ws/test_results/buildfarm_perf_tests/*.csv/*zip*/buildfarm_perf_tests.zip
! mv buildfarm_perf_tests.zip ./data/
! unzip ./data/buildfarm_perf_tests.zip -d ./data/ 

--2020-10-19 10:24:54--  http://build.ros2.org/job/Rci__nightly-performance_ubuntu_focal_amd64/97/artifact/ws/test_results/buildfarm_perf_tests/*.csv/*zip*/buildfarm_perf_tests.zip
Resolving build.ros2.org (build.ros2.org)... 13.52.151.147
Connecting to build.ros2.org (build.ros2.org)|13.52.151.147|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘buildfarm_perf_tests.zip’

buildfarm_perf_test     [ <=>                ]  83.19K  --.-KB/s    in 0.08s   

2020-10-19 10:24:54 (1.01 MB/s) - ‘buildfarm_perf_tests.zip’ saved [85189]

Archive:  ./data/buildfarm_perf_tests.zip
replace ./data/overhead_node_test_results_rmw_connext_cpp_async.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [3]:
# First let's try to figure out blocks of data
# I.e. what are the "sets" of files we can process.
out = glob.glob("./data/*.csv")

print("Total Files: {0}".format(len(out)))    
perf_files = [f for f in out if "performance" in f]
print("Performance Files: {0}".format(len(perf_files)))
overhead_files = [f for f in out if "overhead" in f]
print("Overhead Files: {0}".format(len(overhead_files)))
two_files = [f for f in out if "two_process" in f]
print("Two Process Files: {0}".format(len(two_files)))
sync_files = [f for f in out if "_sync" in f]
print("Sync Files: {0}".format(len(sync_files)))
async_files = [f for f in out if "async" in f]
print("Async Files: {0}".format(len(async_files)))
pub_files = [f for f in out if "_pub" in f]
print("pub Files: {0}".format(len(pub_files)))
sub_files = [f for f in out if "_sub" in f]
print("sub Files: {0}".format(len(sub_files)))
node_files = [f for f in out if "node" in f]
print("node Files: {0}".format(len(node_files)))


Total Files: 230
Performance Files: 176
Overhead Files: 54
Two Process Files: 88
Sync Files: 115
Async Files: 115
pub Files: 24
sub Files: 24
node Files: 6


In [4]:
perf_cols = ['mean virtual memory (Mb)',
             'median virtual memory (Mb)',
             'virtual memory (Mb)',
             'mean cpu_usage (%)',
             'median cpu_usage (%)',
             'cpu_usage (%)',
             'mean physical memory (Mb)',
             'median physical memory (Mb)',
             'physical memory (Mb)',
             'mean resident anonymous memory (Mb)',
             'median resident anonymous memory (Mb)',
             'resident anonymous memory (Mb)']

In [9]:
# Take all of the "overhead" files and try to merge them into a single table. 
for p in node_files:
    print(p)

df = pd.read_csv(node_files[0])
df.columns = perf_cols
for p in node_files[1:]:
    temp = pd.read_csv(p)
    temp.columns = perf_cols
    df = df.append(temp)
# parse the filenames and add that data. 
df["config"] = ["_".join(n.strip('./data/overhead_node_test_results_rmw_').strip('.csv').split('_')[1:]) for n in node_files]
df["vendor"] = [n.strip('./data/overhead_node_test_results_rmw_').split('_')[0] for n in node_files]
df = df[df.columns[::-1]]
df["file_name"] = node_files
df.to_csv("node_perf.csv")
print(len(df))
print(df["file_name"])

./data/overhead_node_test_results_rmw_connext_cpp_async.csv
./data/overhead_node_test_results_rmw_cyclonedds_cpp_sync.csv
./data/overhead_node_test_results_rmw_fastrtps_dynamic_cpp_async.csv
./data/overhead_node_test_results_rmw_fastrtps_cpp_sync.csv
./data/overhead_node_test_results_rmw_fastrtps_dynamic_cpp_sync.csv
./data/overhead_node_test_results_rmw_fastrtps_cpp_async.csv
6
0    ./data/overhead_node_test_results_rmw_connext_...
0    ./data/overhead_node_test_results_rmw_cycloned...
0    ./data/overhead_node_test_results_rmw_fastrtps...
0    ./data/overhead_node_test_results_rmw_fastrtps...
0    ./data/overhead_node_test_results_rmw_fastrtps...
0    ./data/overhead_node_test_results_rmw_fastrtps...
Name: file_name, dtype: object


In [None]:
def fname_to_data(fname, head="./data/overhead_test_results_rmw_",tail="_ROS2_pub.csv"):
    """
    Munge a file name into metadata. Pull out the first and seond RMW 
    along with the "flavor" information
    """
    fname = fname.replace(head,"").replace(tail,"")
    parts = fname.split("_rmw_")
    first = parts[0].split("_")
    second = parts[1].split("_")
    # format is rmw _ <name> _ <config> _ rwm _ <name2> _ <config2>
    ret_val = {}
    ret_val["first_rmw"] = first[0]
    ret_val["second_rmw"] = second[0]
    ret_val["first_flavor"] = " ".join(first[1:])
    ret_val["second_flavor"] = " ".join(second[1:])
    return(ret_val)

fname_to_data("./data/overhead_test_results_rmw_fastrtps_cpp_async_rmw_connext_cpp_ROS2_pub.csv")

In [None]:
pub_sub_cols = ['mean virtual memory (Mb)',
                'median virtual memory (Mb)',
                'virtual memory (Mb)',
                'mean cpu_usage (%)',
                'median cpu_usage (%)',
                'cpu_usage (%)',
                'mean physical memory (Mb)',
                'median physical memory (Mb)',
                'physical memory (Mb)',
                'mean resident anonymous memory (Mb)',
                'median resident anonymous memory (Mb)',
                'resident anonymous memory (Mb)',
                'mean latency_mean (ms)',
                'median latency_mean (ms)',
                'Top 5% latency (ms)',
                'max ru_maxrss',
                'mean received',
                'mean sent',
                'sum lost',
                'mean system_cpu_usage (%)',
                'mean system virtual memory (Mb)']

In [None]:
# Pull out data for the pub files and repeat for sub files. 
pub_df = pd.read_csv(pub_files[0])
print(pub_files[0])

print("DF Cols {0} vs known cols {1}".format(len(pub_df.columns),len(pub_sub_cols)))    
# squish all the files into one table
pub_df.columns = pub_sub_cols
for p in pub_files[1:]:
    temp = pd.read_csv(p)
    temp.columns = pub_sub_cols
    pub_df = pub_df.append(temp)
# parse the file names into data and add them back to table. 
flavors = [fname_to_data(flavor) for flavor in pub_files]
pub_df["from_rmw"]= [flavor["first_rmw"] for flavor in flavors]
pub_df["from_rmw_flavor"]= [flavor["first_flavor"] for flavor in flavors]
pub_df["to_rmw"]= [flavor["second_rmw"] for flavor in flavors]
pub_df["to_rmw_flavor"]= [flavor["second_flavor"] for flavor in flavors]
pub_df["file_name"] = pub_files
pub_df = pub_df[pub_df.columns[::-1]]
pub_df.to_csv("pub_perf.csv")
pub_df.head()

In [None]:
# Now repeat for subscribersub_perf.head()
sub_df = pd.read_csv(sub_files[0])
print(sub_files[0])

print("DF Cols {0} vs known cols {1}".format(len(sub_df.columns),len(pub_sub_cols)))    

sub_df.columns = pub_sub_cols
for p in sub_files[1:]:
    temp = pd.read_csv(p)
    temp.columns = pub_sub_cols
    sub_df = sub_df.append(temp)
    
flavors = [fname_to_data(flavor,tail="_ROS2_sub.csv") for flavor in sub_files]
sub_df["from_rmw"]= [flavor["first_rmw"] for flavor in flavors]
sub_df["from_rmw_flavor"]= [flavor["first_flavor"] for flavor in flavors]
sub_df["to_rmw"]= [flavor["second_rmw"] for flavor in flavors]
sub_df["to_rmw_flavor"]= [flavor["second_flavor"] for flavor in flavors]
sub_df["file_name"] = sub_files
sub_df = sub_df[sub_df.columns[::-1]]
sub_df.to_csv("sub_perf.csv")
sub_df.head()

In [None]:
# now aggregate the performance results, there are two types two process and and "results"
two_process_perf = [p for p in perf_files if "two_process" in p]
result_perf_file = [p for p in perf_files if "two_process" not in p]
print("{0} two process files and {1} results files. {2} total files.".format(len(two_process_perf),len(result_perf_file),len(perf_files)))

# From: https://github.com/ahcorde/buildfarm_perf_tests/blob/master/test/test_performance.py.in#L48
perf_col_names = [
    'mean latency_mean (ms)',
    'median latency_mean (ms)',
    '95th Percentile Latency',
    'max ru_maxrss',
    'mean received',
    'mean sent',
    'sum lost',
    'mean cpu_usage (%)',
    '95th Percentile CPU',
    'median cpu_usage (%)',
    'mean data_received (Mb)',
    'median data_received (Mb)',
    '95th Percentile Data Received (Mb)']


In [None]:
def fname_to_rmw_and_data(fname):
    """
    Parse and return file names of the format
    performnace_test_resuts_<optional rmw>_<rmw_name>_<rmw_flavor>_<datatype>.csv
    E.g. 
    ./data/performance_test_results_rmw_fastrtps_dynamic_cpp_async_Array32k.csv
    ./data/performance_test_results_FastRTPS_sync_Array2m.csv
    ./data/performance_test_results_CycloneDDS_sync_Array1k.csv
    """
    fname = fname.replace("./data/performance_test_two_process_results_rmw_","")
    fname = fname.replace("./data/performance_test_two_process_results_","")
    fname = fname.replace("./data/performance_test_results_","")
    
    fname = fname.replace(".csv","")
    parts = fname.split("_");
    ret_val = {}
    ret_val["type"] = parts[-1] # last entry is type, easy
    if(parts[0] == "rmw"):
        parts = parts[1:] # drop the first value if it is RMW
    ret_val["vendor"] = parts[0].lower() # both upper and lower is present
    ret_val["flavor"] = "_".join(parts[1:-1])
    return ret_val 

In [None]:
perf_df = pd.read_csv(result_perf_file[0])
print(result_perf_file[0])

print("DF Cols {0} vs known cols {1}".format(len(perf_df.columns),len(perf_col_names)))

perf_df.columns = perf_col_names

# smush main csv files together
for p in result_perf_file[1:]:
    temp = pd.read_csv(p)
    temp.columns = perf_col_names
    perf_df = perf_df.append(temp)
# parse file names 
fname_data = [fname_to_rmw_and_data(p) for p in result_perf_file]
perf_df["vendor"] = [p["vendor"] for p in fname_data]
perf_df["flavor"] = [p["flavor"] for p in fname_data]
perf_df["data_type"] = [p["type"] for p in fname_data]
perf_df["file_name"] = result_perf_file
perf_df = perf_df[perf_df.columns[::-1]]
perf_df.to_csv("perf_network_results.csv")
perf_df.head()


In [None]:
twop_df = pd.read_csv(two_process_perf[0])
print(two_process_perf[0])

print("DF Cols {0} vs known cols {1}".format(len(twop_df.columns),len(perf_col_names)))

twop_df.columns = perf_col_names

# smush main csv files together
for p in two_process_perf[1:]:
    temp = pd.read_csv(p)
    temp.columns = perf_col_names
    twop_df = twop_df.append(temp)
# parse file names 
fname_data = [fname_to_rmw_and_data(p) for p in two_process_perf]
twop_df["vendor"] = [p["vendor"] for p in fname_data]
twop_df["flavor"] = [p["flavor"] for p in fname_data]
twop_df["data_type"] = [p["type"] for p in fname_data]
twop_df["file_name"] = two_process_perf
twop_df = twop_df[twop_df.columns[::-1]]
twop_df.to_csv("two_process_perf_network_results.csv")
twop_df.head()


In [None]:
total = len(pub_files)+len(sub_files)+len(node_files)+len(two_process_perf)+len(result_perf_file)
print("processed {0} of {1}".format(total,len(glob.glob("./data/*.csv"))))