# Analyze Microbenchmarks

Load, parse and analyze microbenchmarks.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
import os
import json

In [18]:
commitTable = pd.read_csv('commitTable.csv', sep=';', index_col=0)
commitTable = commitTable.drop(0)
commitTable = commitTable[::-5][::-1]
commitTable.to_csv('commitTable5.csv', sep=';')

## Preprocess and clean data

In [2]:
results_file = glob.glob(os.path.join("resultsMicro", "runtest", "0",  "microbenchResults.csv"))     # advisable to use os.path.join as this makes concatenation OS independent

df = pd.read_csv(results_file[0], names=["run", "Baseline", "path", "name", "system", "invocations", "runtime"], sep=';')


df.loc[(df["name"] == "16") & (df["runtime"] < 1500), "name"] = "BenchmarkStorageAddRows1"
df.loc[(df["name"] == "16") & (df["runtime"] < 10000), "name"] = "BenchmarkStorageAddRows2"
df.loc[(df["name"] == "16") & (df["runtime"] < 100000), "name"] = "BenchmarkStorageAddRows3"
df.loc[(df["name"] == "16"), "name"] = "BenchmarkStorageAddRows4"

df = df[["run", "path", "name", "system", "runtime"]]

df1 = df[df["system"] == 1]
df2 = df[df["system"] == 2]
df1.head(10)

Unnamed: 0,run,path,name,system,runtime
6,0-0-0,/lib/protoparser/csvimport/parser_timing_test....,BenchmarkRowsUnmarshal-2,1,838.0
7,0-0-0,/lib/protoparser/csvimport/parser_timing_test....,BenchmarkRowsUnmarshal-2,1,843.9
8,0-0-0,/lib/protoparser/csvimport/parser_timing_test....,BenchmarkRowsUnmarshal-2,1,836.0
9,0-0-0,/lib/protoparser/csvimport/parser_timing_test....,BenchmarkRowsUnmarshal-2,1,836.1
10,0-0-0,/lib/protoparser/csvimport/parser_timing_test....,BenchmarkRowsUnmarshal-2,1,834.9
11,0-0-0,/lib/protoparser/csvimport/parser_timing_test....,BenchmarkRowsUnmarshal-2,1,835.3
18,0-0-0,/lib/storage/index_db_timing_test.go/Benchmark...,"""bench-index-db-get-tsids""",1,7066595.0
19,0-0-0,/lib/storage/index_db_timing_test.go/Benchmark...,"""bench-index-db-get-tsids""",1,10174882.0
20,0-0-0,/lib/storage/index_db_timing_test.go/Benchmark...,"""bench-index-db-get-tsids""",1,10034955.0
21,0-0-0,/lib/storage/index_db_timing_test.go/Benchmark...,"""bench-index-db-get-tsids""",1,6931156.0


In [3]:
def aggregate_values(df):
    df_aggregated = pd.DataFrame(columns = ['path', 'name', 'median', 'mean', 'rel_std'])

    for name, group in df.groupby(["path", "name"]):
        df_tmp = df[(df["path"] == name[0]) & (df["name"] == name[1])]
        if df_tmp.shape[0] == 30:
            df_aggregated = df_aggregated.append({'path' : name[0], 
                                                    'name' : name[1], 
                                                    'median' : df_tmp["runtime"].median(), 
                                                    'mean' : df_tmp["runtime"].mean(), 
                                                    'rel_std' : 100 * df_tmp["runtime"].std() / df_tmp["runtime"].mean()
                                                   }, 
                    ignore_index = True)

    return df_aggregated


In [4]:
df1_aggregated = aggregate_values(df1)

In [5]:
df2_aggregated = aggregate_values(df2)

In [8]:
df1_aggregated[df1_aggregated['rel_std']>5]

Unnamed: 0,path,name,median,mean,rel_std
9,/lib/decimal/decimal_timing_test.go/BenchmarkF...,BenchmarkFromFloat/0-2,3.3865,3.5483,5.261962
67,/lib/storage/index_db_timing_test.go/Benchmark...,"""bench-index-db-add-tsids""",6939450.0,8120782.0,22.229582
68,/lib/storage/index_db_timing_test.go/Benchmark...,"""bench-index-db-get-tsids""",8747878.0,8711584.0,8.463439
92,/lib/uint64set/uint64set_timing_test.go/Benchm...,BenchmarkIntersectFullOverlap/items_10000-2,6115.0,8230.767,33.487537
93,/lib/uint64set/uint64set_timing_test.go/Benchm...,BenchmarkIntersectFullOverlap/items_100000-2,17639.0,16671.37,13.770035
97,/lib/uint64set/uint64set_timing_test.go/Benchm...,BenchmarkIntersectNoOverlap/items_10000-2,5936.0,6227.6,10.202525
102,/lib/uint64set/uint64set_timing_test.go/Benchm...,BenchmarkIntersectPartialOverlap/items_10000-2,6098.0,6314.5,8.983047
103,/lib/uint64set/uint64set_timing_test.go/Benchm...,BenchmarkIntersectPartialOverlap/items_100000-2,14707.5,14367.87,5.387355
114,/lib/uint64set/uint64set_timing_test.go/Benchm...,BenchmarkMapAddRandomLastBits/lastBits_32-2,5569072.0,5626655.0,5.659037
138,/lib/uint64set/uint64set_timing_test.go/Benchm...,BenchmarkSetAddRandomLastBits/lastBits_32-2,8207494.0,17284390.0,128.764607


In [6]:
rel_speedup = (df1_aggregated['median'] / df2_aggregated['median'] - 1) * 100
rel_speedup[rel_speedup > 5]

103    53.458890
148     7.814112
151     5.349477
156    40.292096
157     7.209760
Name: median, dtype: float64

In [7]:
print(df1_aggregated.loc[105], df2_aggregated.loc[105])

path       /lib/uint64set/uint64set_timing_test.go/Benchm...
name       BenchmarkIntersectPartialOverlap/items_10000000-2
median                                              933313.5
mean                                           934324.633333
rel_std                                             1.567717
Name: 105, dtype: object path       /lib/uint64set/uint64set_timing_test.go/Benchm...
name       BenchmarkIntersectPartialOverlap/items_10000000-2
median                                              943510.0
mean                                           938351.333333
rel_std                                             2.236638
Name: 105, dtype: object
