# Ceph librados benchmarking
These tests were based on a cluster of 4 nodes:
* 1x head node with the monitor and metadata service.  This is also where the tests were run
* 3x OSD nodes

In [1]:
import sys, glob, os, re, warnings, matplotlib, datetime, pandas, numpy
%matplotlib inline

This notebook graphs the baseline measurements in of the results in:

In [2]:
#RESULTS = "./p3results/"
#RESULTS = "./ramdisk_results/"
RESULTS = "./results/"
for f in sorted(os.listdir(RESULTS)): print("\t- " + f)

	- radosbench-rand-run1-Wed_Apr_25_04:42:38_NZST_2018
	- radosbench-rand-run2-Wed_Apr_25_04:45:49_NZST_2018
	- radosbench-rand-run3-Wed_Apr_25_04:49:00_NZST_2018
	- radosbench-seq-run1-Wed_Apr_25_05:08:05_NZST_2018
	- radosbench-seq-run2-Wed_Apr_25_05:10:13_NZST_2018
	- radosbench-seq-run3-Wed_Apr_25_05:12:21_NZST_2018
	- radosbench-write-run1-Wed_Apr_25_04:24:25_NZST_2018
	- radosbench-write-run1-Wed_Apr_25_04:26:25_NZST_2018
	- radosbench-write-run2-Wed_Apr_25_04:29:35_NZST_2018
	- radosbench-write-run3-Wed_Apr_25_04:32:46_NZST_2018


## RADOSBENCH
Object IO  performance of the cluster is tested using the RADOS CLI and configure the benchmarking tool to exercise the cluster with writes, sequential reads, and random reads for 3 minutes each for 3 cycles. More specifically, the benchmark achieves this by:

```
Maintaining 16 concurrent writes/reads of 4194304 bytes to objects of size 4194304 for up to 1 seconds or 0 objects
```

For further reading, check out this [tutorial](http://tracker.ceph.com/projects/ceph/wiki/Benchmark_Ceph_Cluster_Performance).

**Write Command**: `rados bench --show-time -p scbench 180 write --no-cleanup`

**Read Command**: `rados bench --show-time -p scbench 180 [seq | rand]`

**Expectation**: sustained throughput equal to the throughput of either the slowest disk or the network speed.

In [3]:
# parse: slurp up the RADOSBENCH output into a Pandas data frame
RADOSBENCH_COLHEADERS = ("date", "time", "sec", "Cur ops", "started", "finished", "avg MB/s", "cur MB/s", "last lat(s)", "avg lat(s)")

df_thruput = pandas.DataFrame()
for fname in glob.glob(RESULTS + "radosbench-*"):
    df = pandas.DataFrame(columns=RADOSBENCH_COLHEADERS)
    with open(fname, "r") as input:
        for line in input:
            if not re.search('[a-zA-Z]', line): # skip lines that don't have raw numbers
                df.loc[len(df)] = line.split()  # add this line to end of the data frame
    df_thruput[fname] = df['avg MB/s']

In [4]:
# parse: slurp up the RADOSBENCH output into a Pandas data frame
RADOSBENCH_COLHEADERS = ("date", "time", "sec", "Cur ops", "started", "finished", "avg MB/s", "cur MB/s", "last lat(s)", "avg lat(s)")

df_to_csv = pandas.DataFrame()
for fname in glob.glob(RESULTS + "radosbench-*"):
    df = pandas.DataFrame(columns=RADOSBENCH_COLHEADERS)
    with open(fname, "r") as input:
        for line in input:
            if not re.search('[a-zA-Z]', line): # skip lines that don't have raw numbers
                df.loc[len(df)] = line.split()  # add this line to end of the data frame
    label = fname.split('-')[1] + '-' + fname.split('-')[2]
    df_to_csv[label] = df['avg MB/s']
df_to_csv.to_csv('radosbench.csv')

In [5]:
# plot: graph a time series of instantaneous throughput
pandas.set_option('display.max_rows', 1000)
#df_thruput = df_thruput.fillna(0.0)
df_to_csv

Unnamed: 0,seq-run2,rand-run2,write-run2,write-run1,rand-run1,seq-run3,seq-run1,write-run3,rand-run3
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2854.53,2903.45,1831.89,1915.76,2895.29,2821.47,2918.11,1871.85,2859.42
2,2888.98,2905.49,1869.77,1957.72,2917.41,2864.52,2922.6,1835.76,2873.5
3,2886.65,2911.52,1899.74,1941.07,2920.83,2877.31,2932.29,1873.07,2884.91
4,2893.87,2900.55,1912.73,1926.75,2929.48,2876.91,2936.63,1885.76,2900.61
5,2898.22,2906.76,1914.12,1930.95,2932.33,2870.2,2934.84,1898.17,2904.43
6,2902.42,2911.57,1909.06,1936.42,2935.52,2871.12,2936.75,1907.1,2899.63
7,2907.73,2915.01,1902.02,1940.32,2934.38,2876.75,2938.3,1906.63,2901.32
8,2911.55,2915.08,1904.72,1946.75,2936.55,2879.62,2939.8,1896.77,2905.11
9,2912.74,2918.7,1900.16,1944.64,2937.33,2882.27,2941.11,1893.55,2908.93


In [6]:

from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

from plotly.graph_objs import *
import numpy as np

import cufflinks as cf
df = df_to_csv
data = [{
    'x': df.index,
    'y': df[col],
    'name': col
}  for col in df.columns]

iplot({'data': data,
       'layout': {'title': 'Throughput: Write,Seq,Rand',
                  'xaxis': {'title': 'Interval(secs)', 'titlefont':{'size': 18}},
                  'yaxis': {'title': 'MB/s', 'titlefont':{'size': 18} }}},
      filename='cufflinks/simple-line',image_width=1024, image_height=768)
