## Report FIO results for EBS Benchmark on gp2 and gp3

Scenario (clusters):
- t1: OCP cluster with 1x gp2
- t2: OCP cluster with 2x gp2 (etcd isolated)
- t3: OCP cluster with 1x gp3
- t4: OCP cluster with 2x gp3 (etcd isolated)

This report aggregates the data collected on "battery 2" (named `b2`), that tested all control plane disks on layouts described above.

The script to create the "battery 2" and collect the data is defined (by WIP script) [here](https://github.com/mtulio/openshift-cluster-benchmark-lab/blob/init/run-test.sh#L250-L271)

References:
 - [FIO doc](https://fio.readthedocs.io/en/latest/fio_doc.html)
 - This report (notebook): reports/fio-ebs_gp3-b2.ipynb
 - This report (markdown/exported): docs/examples/fio-ebs_gp3-b2.md

In [1]:
# install dependencies
! pip install pandas



In [2]:
import os
import json
import pandas as pd
from IPython.display import display

In [3]:
results_path=(f"/results")

results_fio = {}

# files is saved on the format: {battery_id}_{cluster_id}-fio-{hostname}.tar.gz
filter_results_by_battery="b2_"

In [4]:
results_path

'/results'

In [5]:
results_dirs = []
for res in os.listdir(results_path):
    if not res.startswith(filter_results_by_battery):
        continue
    # expects that files was extracted. TODO: extract it
    if res.endswith(".tar.gz"):
        continue
    results_dirs.append(res)

In [6]:
results_dirs

['b2_t1-fio-ip-10-0-137-218.ec2.internal',
 'b2_t2-fio-ip-10-0-142-88.ec2.internal',
 'b2_t3-fio-ip-10-0-137-24.ec2.internal',
 'b2_t4-fio-ip-10-0-133-152.ec2.internal']

In [7]:
def build_node_result(battery_id, test_name, node_id):
    global results_fio
    try:
        x = results_fio[f"{battery_id}"]
    except KeyError:
        results_fio[f"{battery_id}"] = []
        pass
   
    return

In [8]:
def discovery_and_load_fio_results(fpath, battery_id):
    """
    Walk through fio result dir and load JSON files with FIO results,
    returning only desired metrics for each test.
    """
    global results_fio
    for root, dirs, files in os.walk(fpath):
        for file in files:
            if file.endswith(".json"):
                fpath=os.path.join(root, file)
                with open(fpath, 'r') as f:
                    jdata=json.loads(f.read())

                    # Extract jobId from different standards (latest is fio_io_)
                    try:
                        jobId = jdata['jobs'][0]['jobname'].split('etcd')[1]
                    except Exception as e:
                        jobId = jdata['jobs'][0]['jobname'].split('fio_io_')[1]
                        pass

                    # Collect only desired data from entire FIO payload, for each test.
                    # battery_id is: batteryName+clusterName
                    # Considering only one job by result.
                    results_fio[f"{battery_id}"].append({
                        "node_name": f"{node_id}",
                        "jobname": jdata['jobs'][0]['jobname'],
                        "jobID": jobId,
                        "sync_lat_max_ms": (float(jdata['jobs'][0]['sync']['lat_ns']['max'])/1e+6),
                        "sync_lat_mean_ms": (float(jdata['jobs'][0]['sync']['lat_ns']['mean'])/1e+6),
                        "sync_lat_stddev_ms": (float(jdata['jobs'][0]['sync']['lat_ns']['stddev'])/1e+6),
                        "sync_lat_p99_ms": (float(jdata['jobs'][0]['sync']['lat_ns']['percentile']['99.000000'])/1e+6),
                        "sync_lat_p99.9_ms": (float(jdata['jobs'][0]['sync']['lat_ns']['percentile']['99.900000'])/1e+6)
                    })

In [9]:
def aggregate_metric(metric_name):
    """
    Filter desired {metric_name}, extract the jobs (rows) for each cluster (columns),
    and return the data frame.
    JobId | {cluster1}  | [...clusterN |]
    #id   | metricValue | [...metricValue |]
    """
    global results_fio
    data_metric = {}
    for bat in results_fio.keys():
        for res_bat in results_fio[bat]:
            try:
                jid = data_metric[res_bat['jobID']]
            except KeyError:
                data_metric[res_bat['jobID']] = {
                    "job_Id": res_bat['jobID']
                }
                jid = data_metric[res_bat['jobID']]
                pass
            jid[bat] = res_bat[metric_name]
    data_pd = []
    for dk in data_metric.keys():
        row = []
        row.append(data_metric[dk]['job_Id'])
        data_pd.append(data_metric[dk])

    return pd.read_json(json.dumps(data_pd))

In [10]:
def aggregate_by_node():
    """
    Aggregate all available metrics by node, returning a list of values for each one.
    {
        "{node_name}": [{...metrics}]
    }
    """
    global results_fio
    data_metrics = {}
    ignore_keys = ['node_name', 'jobname', 'jobID']
    for bat in results_fio.keys():
        for res_bat in results_fio[bat]:
            try:
                jid = data_metrics[res_bat['node_name']]
            except KeyError:
                data_metrics[res_bat['node_name']] = []

                pass
            metric = {
                    'battery_id': bat,
                    "job_Id": res_bat['jobID']
                }
            for rbk in res_bat.keys():
                if rbk in ignore_keys:
                    continue
                metric[rbk] = res_bat[rbk]
            data_metrics[res_bat['node_name']].append(metric)
    return data_metrics

In [11]:
def _df_style_high(val, value_yellow=None, value_red=None):
    "Data frame styling / cell formating"
    color = None
    if (value_yellow != None) and (val >=  value_yellow):
        color = 'yellow'
    if (value_red != None) and (val >=  value_red):
        color = 'red'
    if color == None:
        return color
    #return f"color: {color}"
    return f"background-color: {color}"

## Discovery and Load results

In [12]:
# Discovery fio payload [json] files created by battery/cluster
# Expected directory name: {batteryId}-fio-{node_name}
# All results will be saved on global var results_fio
for res in results_dirs:
    battery_id = res.split('-')[0]
    test_name = res.split('-')[1]
    node_id = res.split(f"{test_name}-")[1]

    build_node_result(battery_id, test_name, node_id)
    
    discovery_and_load_fio_results(f"{results_path}/{res}", battery_id)

## Results

As described, the tests was done in 4 clusters in two disk layouts (single disk, etcd isolated) using gp2 and gp3. The volume has same capacity using standard values for IOPS and throughput (gp3)

- Total of FIO consecutive tests: 50
- Max IOPS on all jobs job: ~1.5/2k IOPS
- Max IOPS for gp2 device: 386 (capacity=128GiB, throughput*=128 MiB/s)
- Max IOPS for gp3 device: 3000 (capacity=128GiB, throughput=120MiB/s) 

\*[Important note from AWS doc](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html): 
*"The throughput limit is between 128 MiB/s and 250 MiB/s, depending on the volume size. Volumes smaller than or equal to 170 GiB deliver a maximum throughput of 128 MiB/s. Volumes larger than 170 GiB but smaller than 334 GiB deliver a maximum throughput of 250 MiB/s if burst credits are available. Volumes larger than or equal to 334 GiB deliver 250 MiB/s regardless of burst credits. gp2 volumes that were created before December 3, 2018 and that have not been modified since creation might not reach full performance unless you modify the volume."*


____
____
**FIO sync lattency p99 in ms (sync_lat_p99_ms)**

Summary of results:
- after 32th job the gp2 disks consumed all burst credits (higher than max [380 IOPS]) and become slow (5/6x) due to throttlings
- the cluster with etcd as second disk using gp2 was more reliable for a longer period, comparing with single disk node
- gp3 become bellow from max and stable until the end of all tests
- gp3 in normal conditions had lattency higher than gp2
- Trade-off in reliability (when long intensive IOPS) and performance (in normal operation)

TODO:
- Collect time and Load1 between each job (it's available on runtime log)
- Compare gp2 and gp3 in perc

In [13]:
df = aggregate_metric('sync_lat_p99_ms').rename(columns={"b2_t1": "gp2x1","b2_t2": "gp2x2","b2_t3": "gp3x1","b2_t4": "gp3x2"})
df.style.applymap(_df_style_high, subset=["gp2x1", "gp2x2", "gp3x1", "gp3x2"], value_yellow=5.0, value_red=10.0)

Unnamed: 0,job_Id,gp2x1,gp2x2,gp3x1,gp3x2
0,a1,1.92512,2.70336,3.653632,4.685824
1,a2,2.277376,2.932736,4.227072,5.144576
2,a3,2.244608,2.899968,3.883008,3.915776
3,a4,2.37568,2.736128,4.145152,3.915776
4,a5,2.342912,2.998272,4.079616,4.42368
5,b1,2.37568,2.801664,4.046848,5.07904
6,b2,2.37568,2.965504,5.07904,3.784704
7,b3,2.21184,2.899968,4.75136,3.817472
8,b4,2.244608,2.834432,4.489216,3.948544
9,b5,2.342912,2.768896,4.17792,3.620864


____
**FIO sync lattency p99.9 in ms (sync_lat_p99.9_ms)**

- Total of FIO consecutive tests: 50
- Max IOPS on this job: 2k IOPS
- Max IOPS on gp2: 386 (128GiB)
- Max IOPS on gp3: 3000 [default] (128GiB) 

Summary of results:
- Same points of sync_lat_p99_ms; plus:
- isolated etcd disk reported slower than single disk
- gp3 become stable in a long period of writes

In [14]:
df = aggregate_metric('sync_lat_p99.9_ms').rename(columns={"b2_t1": "gp2x1","b2_t2": "gp2x2","b2_t3": "gp3x1","b2_t4": "gp3x2"})
df.style.applymap(_df_style_high, subset=["gp2x1", "gp2x2", "gp3x1", "gp3x2"], value_yellow=5.0, value_red=10.0)

Unnamed: 0,job_Id,gp2x1,gp2x2,gp3x1,gp3x2
0,a1,4.75136,9.50272,8.454144,11.075584
1,a2,6.782976,9.764864,9.50272,13.172736
2,a3,6.586368,9.240576,8.978432,10.15808
3,a4,6.324224,9.764864,9.240576,10.420224
4,a5,6.520832,9.895936,9.764864,10.551296
5,b1,6.782976,9.371648,9.371648,11.730944
6,b2,6.455296,9.764864,10.551296,10.682368
7,b3,6.586368,8.978432,10.027008,10.420224
8,b4,6.651904,9.109504,10.420224,10.682368
9,b5,6.651904,10.027008,9.633792,10.81344


____
**FIO sync lattency Mean in ms (sync_lat_mean_ms)**

- Total of FIO consecutive tests: 50
- Max IOPS on this job: 2k IOPS
- Max IOPS on gp2: 386 (128GiB)
- Max IOPS on gp3: 3000 [default] (128GiB) 

Summary of results:
- gp2: isolated etcd disk reported slower than single disk
- gp3: had similar results in both scenarios

In [15]:
aggregate_metric('sync_lat_mean_ms').rename(columns={"b2_t1": "gp2x1","b2_t2": "gp2x2","b2_t3": "gp3x1","b2_t4": "gp3x2"})\
    .style.applymap(_df_style_high, subset=["gp2x1", "gp2x2", "gp3x1", "gp3x2"], value_yellow=2.0, value_red=5.0)

Unnamed: 0,job_Id,gp2x1,gp2x2,gp3x1,gp3x2
0,a1,0.789064,1.319287,1.333167,1.712503
1,a2,1.056788,1.34898,1.68763,1.756673
2,a3,1.070381,1.334769,1.650445,1.626919
3,a4,1.06862,1.322543,1.641819,1.63342
4,a5,1.069686,1.363166,1.666814,1.701093
5,b1,1.07573,1.342571,1.681684,1.759722
6,b2,1.065582,1.380487,1.774595,1.649717
7,b3,1.041871,1.349172,1.744623,1.65404
8,b4,1.060716,1.323254,1.722502,1.703117
9,b5,1.053578,1.314879,1.681496,1.630997


____
**FIO sync lattency Mean in ms (sync_lat_max_ms)**

- Total of FIO consecutive tests: 50
- Max IOPS on this job: 2k IOPS
- Max IOPS on gp2: 386 (128GiB)
- Max IOPS on gp3: 3000 [default] (128GiB) 

Summary of results:
- isolated gp2 become more reliable than single disk when burst balance ended

In [16]:
aggregate_metric('sync_lat_max_ms').rename(columns={"b2_t1": "gp2x1","b2_t2": "gp2x2","b2_t3": "gp3x1","b2_t4": "gp3x2"}).\
    style.applymap(_df_style_high, subset=["gp2x1", "gp2x2", "gp3x1", "gp3x2"], value_yellow=50.0, value_red=100.0)

Unnamed: 0,job_Id,gp2x1,gp2x2,gp3x1,gp3x2
0,a1,21.362522,42.921417,68.017842,82.157969
1,a2,70.671594,28.750373,18.26689,84.258343
2,a3,14.560474,19.301707,78.664201,82.106066
3,a4,23.222146,35.648584,33.433859,34.026425
4,a5,76.327184,80.075922,76.929612,64.642143
5,b1,16.081376,16.037055,66.840544,27.682156
6,b2,78.391998,67.246422,46.9569,86.332541
7,b3,87.659569,68.943773,22.667406,19.102149
8,b4,75.570967,17.130309,33.258572,18.097099
9,b5,90.578402,83.009037,32.785594,82.894287


____
**FIO sync lattency Standard Deviation in ms (sync_lat_max_ms)**

- Total of FIO consecutive tests: 50
- Max IOPS on this job: 2k IOPS
- Max IOPS on gp2: 386 (128GiB)
- Max IOPS on gp3: 3000 [default] (128GiB) 

Summary of results:
- gp2: similar/expected deviation when throttling for both disk layouts

In [17]:
aggregate_metric('sync_lat_stddev_ms').rename(columns={"b2_t1": "gp2x1","b2_t2": "gp2x2","b2_t3": "gp3x1","b2_t4": "gp3x2"}).\
    style.applymap(_df_style_high, subset=["gp2x1", "gp2x2", "gp3x1", "gp3x2"], value_red=1.0)

Unnamed: 0,job_Id,gp2x1,gp2x2,gp3x1,gp3x2
0,a1,0.430956,0.70388,0.772607,0.959877
1,a2,0.582749,0.723067,0.841022,1.073758
2,a3,0.529805,0.688169,0.836908,0.860998
3,a4,0.537326,0.707594,0.819819,0.842392
4,a5,0.586911,0.754138,0.871086,0.917508
5,b1,0.545497,0.687214,0.853895,0.985698
6,b2,0.615213,0.751622,0.952194,0.877783
7,b3,0.59928,0.730728,0.914686,0.821403
8,b4,0.62172,0.679319,0.917649,0.850724
9,b5,0.612393,0.751204,0.844867,0.88019


____
**FIO sync lattency - all metrics by node - in ms (sync_lat_max_ms)**

- Total of FIO consecutive tests: 50
- Max IOPS on this job: 2k IOPS
- Max IOPS on gp2: 386 (128GiB)
- Max IOPS on gp3: 3000 [default] (128GiB) 

Summary of results: []

In [18]:
node_metrics = aggregate_by_node()
for node in list(node_metrics.keys()):
    print(f"#> {node} [{node_metrics[node][0]['battery_id']}]")
    display(pd.read_json(json.dumps(node_metrics[node])))

#> ip-10-0-137-218.ec2.internal [b2_t1]


Unnamed: 0,battery_id,job_Id,sync_lat_max_ms,sync_lat_mean_ms,sync_lat_stddev_ms,sync_lat_p99_ms,sync_lat_p99.9_ms
0,b2_t1,a1,21.362522,0.789064,0.430956,1.92512,4.75136
1,b2_t1,a2,70.671594,1.056788,0.582749,2.277376,6.782976
2,b2_t1,a3,14.560474,1.070381,0.529805,2.244608,6.586368
3,b2_t1,a4,23.222146,1.06862,0.537326,2.37568,6.324224
4,b2_t1,a5,76.327184,1.069686,0.586911,2.342912,6.520832
5,b2_t1,b1,16.081376,1.07573,0.545497,2.37568,6.782976
6,b2_t1,b2,78.391998,1.065582,0.615213,2.37568,6.455296
7,b2_t1,b3,87.659569,1.041871,0.59928,2.21184,6.586368
8,b2_t1,b4,75.570967,1.060716,0.62172,2.244608,6.651904
9,b2_t1,b5,90.578402,1.053578,0.612393,2.342912,6.651904


#> ip-10-0-142-88.ec2.internal [b2_t2]


Unnamed: 0,battery_id,job_Id,sync_lat_max_ms,sync_lat_mean_ms,sync_lat_stddev_ms,sync_lat_p99_ms,sync_lat_p99.9_ms
0,b2_t2,a1,42.921417,1.319287,0.70388,2.70336,9.50272
1,b2_t2,a2,28.750373,1.34898,0.723067,2.932736,9.764864
2,b2_t2,a3,19.301707,1.334769,0.688169,2.899968,9.240576
3,b2_t2,a4,35.648584,1.322543,0.707594,2.736128,9.764864
4,b2_t2,a5,80.075922,1.363166,0.754138,2.998272,9.895936
5,b2_t2,b1,16.037055,1.342571,0.687214,2.801664,9.371648
6,b2_t2,b2,67.246422,1.380487,0.751622,2.965504,9.764864
7,b2_t2,b3,68.943773,1.349172,0.730728,2.899968,8.978432
8,b2_t2,b4,17.130309,1.323254,0.679319,2.834432,9.109504
9,b2_t2,b5,83.009037,1.314879,0.751204,2.768896,10.027008


#> ip-10-0-137-24.ec2.internal [b2_t3]


Unnamed: 0,battery_id,job_Id,sync_lat_max_ms,sync_lat_mean_ms,sync_lat_stddev_ms,sync_lat_p99_ms,sync_lat_p99.9_ms
0,b2_t3,a1,68.017842,1.333167,0.772607,3.653632,8.454144
1,b2_t3,a2,18.26689,1.68763,0.841022,4.227072,9.50272
2,b2_t3,a3,78.664201,1.650445,0.836908,3.883008,8.978432
3,b2_t3,a4,33.433859,1.641819,0.819819,4.145152,9.240576
4,b2_t3,a5,76.929612,1.666814,0.871086,4.079616,9.764864
5,b2_t3,b1,66.840544,1.681684,0.853895,4.046848,9.371648
6,b2_t3,b2,46.9569,1.774595,0.952194,5.07904,10.551296
7,b2_t3,b3,22.667406,1.744623,0.914686,4.75136,10.027008
8,b2_t3,b4,33.258572,1.722502,0.917649,4.489216,10.420224
9,b2_t3,b5,32.785594,1.681496,0.844867,4.17792,9.633792


#> ip-10-0-133-152.ec2.internal [b2_t4]


Unnamed: 0,battery_id,job_Id,sync_lat_max_ms,sync_lat_mean_ms,sync_lat_stddev_ms,sync_lat_p99_ms,sync_lat_p99.9_ms
0,b2_t4,a1,82.157969,1.712503,0.959877,4.685824,11.075584
1,b2_t4,a2,84.258343,1.756673,1.073758,5.144576,13.172736
2,b2_t4,a3,82.106066,1.626919,0.860998,3.915776,10.15808
3,b2_t4,a4,34.026425,1.63342,0.842392,3.915776,10.420224
4,b2_t4,a5,64.642143,1.701093,0.917508,4.42368,10.551296
5,b2_t4,b1,27.682156,1.759722,0.985698,5.07904,11.730944
6,b2_t4,b2,86.332541,1.649717,0.877783,3.784704,10.682368
7,b2_t4,b3,19.102149,1.65404,0.821403,3.817472,10.420224
8,b2_t4,b4,18.097099,1.703117,0.850724,3.948544,10.682368
9,b2_t4,b5,82.894287,1.630997,0.88019,3.620864,10.81344
