### Usage: plot data of fmke populator

Data is filtered using `n_fmke_pop_process` before plotting into charts.
There are 2 charts, the x-axis is the `n_antidotedb_per_dc`, each chart has multiple lines correspond to the `n_fmke_app_per_dc`:
1. Time to finish the population process with the increasing number of Antidote nodes, y-axis is the time (seconds).
2. The operations per second handled by AntidoteDB for the population process with the increasing number of Antidote nodes, the y-axis is the operations per seconds (ops).

#### Some parameters for plotting charts

In [1]:
N_POP_PROCESSES_TO_FILTER = 100

IMAGE_HEIGHT = 600
IMAGE_WIDTH = 900

# DATA_PATH = '/home/linhnguyen/lig/projects/antidotedb-eval/run/fmke_population/fmke_population_64ring_txn-false/14812nodes_parasilo'
# FILE_NAME = 'pop_64ring_tnx_cert-false_14812nodes_parasilo.csv'
DATA_PATH = '/home/linhnguyen/lig/projects/riakkv-eval/run/'
FILE_NAME = 'riakv_fmke_14812nodes.csv'

In [2]:
import os
import pandas as pd

# Read data

In [3]:
# df = pd.read_csv('1DC_population_time_1-7fmkeapp_1-9nodes_parasilo_p-100.csv')
# df = pd.read_csv('1DC_population_time_1-7fmkeapp_1-9nodes_paravance_p-1.csv')
# df = pd.read_csv('1DC_population_time_1-7fmkeapp_1-9nodes_paravance_p-50.csv')
df = pd.read_csv(os.path.join(DATA_PATH, FILE_NAME))
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   iteration               95 non-null     int64  
 1   n_fmke_pop_process      95 non-null     int64  
 2   dataset                 95 non-null     object 
 3   concurrent_clients      95 non-null     int64  
 4   n_fmke_client_per_site  95 non-null     int64  
 5   n_fmke_app_per_site     95 non-null     int64  
 6   n_riakkv_per_site       95 non-null     int64  
 7   duration                95 non-null     int64  
 8   ops                     95 non-null     float64
dtypes: float64(1), int64(7), object(1)
memory usage: 6.8+ KB


In [4]:
df.head()

Unnamed: 0,iteration,n_fmke_pop_process,dataset,concurrent_clients,n_fmke_client_per_site,n_fmke_app_per_site,n_riakkv_per_site,duration,ops
0,6,1,medium,0,0,8,12,45,252.0
1,1,1,medium,0,0,1,8,45,252.0
2,2,1,medium,0,0,1,1,34,333.0
3,7,1,medium,0,0,1,12,36,315.0
4,5,1,medium,0,0,1,8,47,241.0


### Filtering data

In [5]:
# df = df[df['n_fmke_pop_process'] == N_POP_PROCESSES_TO_FILTER]
# df = df[df['dataset'] == 'medium']

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   iteration               95 non-null     int64  
 1   n_fmke_pop_process      95 non-null     int64  
 2   dataset                 95 non-null     object 
 3   concurrent_clients      95 non-null     int64  
 4   n_fmke_client_per_site  95 non-null     int64  
 5   n_fmke_app_per_site     95 non-null     int64  
 6   n_riakkv_per_site       95 non-null     int64  
 7   duration                95 non-null     int64  
 8   ops                     95 non-null     float64
dtypes: float64(1), int64(7), object(1)
memory usage: 6.8+ KB


# Plot charts

In [6]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Band, Whisker, Range1d, SingleIntervalTicker
from bokeh.models.tools import HoverTool
from bokeh.palettes import Category10_8 as palette
from bokeh.io import output_notebook, export_png

import itertools

In [7]:
output_notebook()

In [8]:
width = IMAGE_WIDTH
height = IMAGE_HEIGHT
output_file_prefix = FILE_NAME.split('.')[0] + '_' + str(N_POP_PROCESSES_TO_FILTER)

## Plot Duration
### Define a figure for plotting

In [9]:
# create a new plot with a title and axis labels
p = figure(title="Write Time with increasing number of nodes",
           plot_width=width, plot_height=height,
           x_axis_label='number of Riak KV nodes', y_axis_label='time (seconds)',
           tools="crosshair, pan,box_select,wheel_zoom,zoom_in,zoom_out,xzoom_in,xzoom_out, yzoom_in, yzoom_out,save,reset"
           )
# change font size of chart
p.xaxis.axis_label_text_font_size = "15pt"
p.xaxis.major_label_text_font_size = "15pt"
p.yaxis.axis_label_text_font_size = "15pt"
p.yaxis.major_label_text_font_size = "15pt"

hover_renderers = list()

chart_num = 0
colors = itertools.cycle(palette)

#### Plot write time

In [10]:
data = df[df['duration'].isnull() == False].groupby(['n_fmke_app_per_site', 'n_riakkv_per_site', 'iteration'])['duration'].mean().to_frame().reset_index()
data['_id'] = data['n_fmke_app_per_site'].apply(str) + '_' + data['n_riakkv_per_site'].apply(str)

data1 = data.groupby(['n_fmke_app_per_site', 'n_riakkv_per_site', '_id'])['duration'].mean().to_frame().reset_index()
data2 = data.groupby(['n_fmke_app_per_site', 'n_riakkv_per_site', '_id'])['duration'].std().to_frame().reset_index()
data2.columns = ['n_fmke_app_per_site_2', 'n_riakkv_per_site_2', '_id', 'duration_std']
# data2['_id'] = data2['n_fmke_app_per_site'].apply(str) + '_' + data2['n_riakkv_per_site'].apply(str)

data = data1.merge(data2, on='_id', how='left')
data['lower'] = data['duration'] - data['duration_std']
data['upper'] = data['duration'] + data['duration_std']

for fmke_app in sorted(data['n_fmke_app_per_site'].unique().tolist()):
    
    # convert data to bokeh data structure
    bokeh_data = ColumnDataSource(data[data['n_fmke_app_per_site'] == fmke_app])
    cur_color = next(colors)
    label = 'FMKe app = %s' % fmke_app
    # plot the post count by date chart
    p.line(x='n_riakkv_per_site', y='duration', source=bokeh_data,
           legend_label=label,
           line_width=2, color=cur_color)
    p.circle(x='n_riakkv_per_site', y='duration', source=bokeh_data,
                     legend_label=label,
                     size=10, color=cur_color)
    p.varea(x='n_riakkv_per_site', y1='lower', y2='upper', source=bokeh_data,
            legend_label=label,
            fill_alpha=0.1, fill_color=cur_color)

# add legend
p.legend.location = "top_left"

p.xaxis.ticker = data['n_riakkv_per_site'].unique()
p.x_range = Range1d(data['n_riakkv_per_site'].min() - 0.5, data['n_riakkv_per_site'].max() + 0.5)

chart_num += 1

In [11]:
p.legend.click_policy="hide"
# plot chart
show(p)
# save chart
output_file_name = os.path.join(DATA_PATH, output_file_prefix + '_time.png')
export_png(p, filename=output_file_name)

'/home/linhnguyen/lig/projects/riakkv-eval/run/riakv_fmke_14812nodes_100_time.png'

## Plot Ops

### Define a figure for plotting

In [12]:
# create a new plot with a title and axis labels
p = figure(title="Ops with ring=16, txn_cert=true",
           plot_width=width, plot_height=height,
           x_axis_label='number of Antidote nodes', y_axis_label='operations per second',
           tools="crosshair, pan,box_select,wheel_zoom,zoom_in,zoom_out,xzoom_in,xzoom_out, yzoom_in, yzoom_out,save,reset"
           )
# change font size of chart
p.xaxis.axis_label_text_font_size = "15pt"
p.xaxis.major_label_text_font_size = "15pt"
p.yaxis.axis_label_text_font_size = "15pt"
p.yaxis.major_label_text_font_size = "15pt"

hover_renderers = list()

chart_num = 0
colors = itertools.cycle(palette)

#### Plot write time

In [13]:
data = df[df['ops'].isnull() == False].groupby(['n_fmke_app_per_site', 'n_riakkv_per_site', 'iteration'])['ops'].mean().to_frame().reset_index()
data['_id'] = data['n_fmke_app_per_site'].apply(str) + '_' + data['n_riakkv_per_site'].apply(str)

data1 = data.groupby(['n_fmke_app_per_site', 'n_riakkv_per_site', '_id'])['ops'].mean().to_frame().reset_index()
data2 = data.groupby(['n_fmke_app_per_site', 'n_riakkv_per_site', '_id'])['ops'].std().to_frame().reset_index()
data2.columns = ['n_fmke_app_per_site_2', 'n_riakkv_per_site_2', '_id', 'ops_std']
# data2['_id'] = data2['n_fmke_app_per_site'].apply(str) + '_' + data2['n_riakkv_per_site'].apply(str)

data = data1.merge(data2, on='_id', how='left')
data['lower'] = data['ops'] - data['ops_std']
data['upper'] = data['ops'] + data['ops_std']

for fmke_app in sorted(data['n_fmke_app_per_site'].unique().tolist()):
    
    # convert data to bokeh data structure
    bokeh_data = ColumnDataSource(data[data['n_fmke_app_per_site'] == fmke_app])
    cur_color = next(colors)
    label = 'FMKe app = %s' % fmke_app
    # plot the post count by date chart
    p.line(x='n_riakkv_per_site', y='ops', source=bokeh_data,
           legend_label=label,
           line_width=2, color=cur_color)
    p.circle(x='n_riakkv_per_site', y='ops', source=bokeh_data,
                     legend_label=label,
                     size=10, color=cur_color)
    p.varea(x='n_riakkv_per_site', y1='lower', y2='upper', source=bokeh_data,
            legend_label=label,
            fill_alpha=0.1, fill_color=cur_color)

# add legend
p.legend.location = "top_left"

p.xaxis.ticker = data['n_riakkv_per_site'].unique()
p.x_range = Range1d(data['n_riakkv_per_site'].min() - 2, data['n_riakkv_per_site'].max() + 0.5)
# p.y_range = Range1d(8000, 22000)

chart_num += 1

In [14]:
p.legend.click_policy="hide"
# plot chart
show(p)

# save chart
output_file_name = os.path.join(DATA_PATH, output_file_prefix + '_ops.png')
export_png(p, filename=output_file_name)

'/home/linhnguyen/lig/projects/riakkv-eval/run/riakv_fmke_14812nodes_100_ops.png'