In [None]:
import plotly.graph_objects as go
import pandas as pd
from statistics import mean
from plotly.subplots import make_subplots
import datetime
from functools import reduce
from IPython.display import HTML, display
import seaborn as sns
import os
from lib.yaml_config.yaml_config import YamlConfig
import fnmatch
import re

In [None]:
default_lines_attributes = [
    
    {  # Line 1
        'maker': dict(color='#d95f0e', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#d95f0e', width=1),  # colors https://colorbrewer2.org/#type=sequential&scheme=YlOrBr&n=3
        'textfont': dict(color='#d95f0e', size=10)
    },
    {  # Line 2
        'maker': dict(color='#52BCA3', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#52BCA3', width=1),
        'textfont': dict(color='#52BCA3', size=10)
    },
     {  # Line 3
        'maker': dict(color='#756bb1', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#756bb1', width=1),
        'textfont': dict(color='#756bb1', size=10)
    },
    {  # Line 4
        'maker': dict(color='#dd1c77', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#dd1c77', width=1),
        'textfont': dict(color='#dd1c77', size=10)
    },
    {  # Line 5
        'maker': dict(color='#fec44f', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#fec44f', width=1),
        'textfont': dict(color='#fec44f', size=10)
    },

     {  # Line 6
        'maker': dict(color='#636363', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#636363', width=1),
        'textfont': dict(color='#636363', size=10)
    },
    
    {  # Line 7
        'maker': dict(color='#00CCFF', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#00CCFF', width=1),
        'textfont': dict(color='#00CCFF', size=10)
    },
    
    
    {  # Line 8
        'maker': dict(color='#33FF33', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#33FF33', width=1),
        'textfont': dict(color='#33FF33', size=10)
    },
    
   {  # Line 9
        'maker': dict(color='#CC0099', size=8), 
        'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
        'line': dict(color='#CC0099', width=1),
        'textfont': dict(color='#CC0099', size=10)
    },
    
]

In [None]:
def clean_data (string_data, sep=' '):
    clean_data = []
    for s in string_data.splitlines():
        if len(s) > 0:
            clean_data.append(s.split(sep))
    df = pd.DataFrame(clean_data[1:],columns=clean_data[0])
    df = df.apply(pd.to_numeric)
    #df['concurrency'] = df['concurrency'].astype(str)

    return df 

In [None]:
styles = [
    # hover(), 
    dict(selector="th", props=[("font-size", "100%"),
                               ("text-align", "right")]),
    dict(selector="caption", props=[("caption-side", "bottom")])
]

In [1]:
def plot_all_experiments (experiments_data, 
                          y = '95th_latency', 
                          x = 'throughput', 
                          text = 'concurrency',
                          text_id = 0,  # Show anly for the first series 
                          x_title = 'Concurency/Threads',
                          y_title = 'Throughput',
                          title='Sysbench workload, VM configuration<br>3 node Xpand multi-zone vs single zone', 
                          annotation='Both configuration includes:<br> - SSL<br> - Single MaxScale node<br> - Driver in the same zone as MS',
                          mode = None, 
                          print_data_table = False,  # whether to print data table at the end or not 
                          print_raw_data = False,  # whether to print raw data before the graph 
                          color_palette = None,  # Set2 is recommened, see https://seaborn.pydata.org/generated/seaborn.color_palette.html
                          sep = ' ' # data separator 
                         ):
    '''
    experiments_data - array of dict {'data', 'name'}
    
    '''
    
    if color_palette is not None:
        lines_attributes = []
        pal = sns.color_palette(color_palette, len(experiments_data))
        colors = pal.as_hex()
        for i in range(len(experiments_data)):
            color = colors[i]
            lines_attributes.append( {  
                'maker': dict(color=color, size=8), 
                'marker_symbol': 'diamond',  # https://plotly.com/python/marker-style/ 
                'line': dict(color=color, width=1),
                'textfont': dict(color=color, size=10)
                },)
    else:
        lines_attributes = default_lines_attributes
    
    fig = go.Figure() 
    
    all_dfs = []
    
    for i, exp in enumerate(experiments_data):
        data = exp.get('data')
        if isinstance(data, pd.DataFrame):
            df = data
        else:  # must be string 
             df = clean_data(data, sep=sep)
        exp_name = exp.get('name')
        custom_line_attributes = exp.get('line_attributes', {})

        if print_raw_data:
            print (exp_name)
            print (df.to_string(index=False))
        if print_data_table:    
            data_tb = df[[y,x, text]].set_index(text)
            data_tb.rename(columns = {x: x +'_' + exp.get('name'), y: y + '_'  + exp.get('name')}, inplace = True)

            all_dfs.append(data_tb)        
        
        trace_line_attributes = lines_attributes[i] | custom_line_attributes
        # trace_line_attributes = lines_attributes[i] if custom_line_attributes else lines_attributes[i]
        
        fig.add_trace(go.Scatter(y=df[y],
                                        x=df[x],
                                        text=df[text]  if text_id <0 or i == text_id else None,
                                        textposition='top right',
                                        textfont=trace_line_attributes.get('textfont'),   # dict(color='#E58606', size=10),
                                        mode= mode or 'lines+markers+text',
                                        marker=trace_line_attributes.get('marker'), #  dict(color='#52BCA3', size=8),
                                        marker_symbol=trace_line_attributes.get('marker_symbol'), # 'diamond',    
                                        line=trace_line_attributes.get('line'),   # dict(color='#52BCA3', width=1), 
                                        name = exp_name
                                        ))


    fig.update_layout(title=title, width = 1000, height = 600, title_font_family='Verdana', title_font_size=18)
    fig.update_xaxes(title=x_title, title_font_family='Verdana', title_font_size=14)
    fig.update_yaxes(title=y_title, title_font_family='Verdana', title_font_size=14 )  # type="log"
    fig.update_yaxes(nticks=20)
    fig.update_xaxes(nticks=20)
    fig.layout.template = 'plotly_white'
    #fig.update_layout(xaxis=dict(range=[0,40000]))
    fig.add_annotation(text=annotation,
                       xref="paper",
                       yref="paper",
                       align="left",
                       x=0.05, 
                       y=.9,
                       showarrow=False)
    now = datetime.datetime.now()
    fig.add_annotation(text=f"Prepared by Xpand Perf team<br>{now:%B, %Y}",
                       xref="paper",
                       yref="paper",
                       align="left",
                       x=0.01,
                       y=-0.2,
                       showarrow=False)
    fig.show()
    if print_data_table: 
    # Prepare data table 
        final = reduce(lambda x, y: pd.merge(x, y, left_index=True, right_index=True), all_dfs)
        format_dict =  {}
        for c in final.columns:
            format_dict[c] =  '{:,.1f}'

        table_db = reduce(lambda x, y: pd.merge(x, y, left_index=True, right_index=True), all_dfs)
        style = final.style.format(format_dict).set_table_styles(styles) #. set_precision(1).highlight_max(color='lightgreen')

        display(style)

In [None]:
def get_config(yaml_file_name):
    ya = YamlConfig(yaml_config_file=yaml_file_name)

    return ya._yaml_config_dict

In [None]:
# Load data after (multiple) experiments to compare the results 
def load_experiment_data(config):
    experiments_data = []
    list_dir = [f.path for f in os.scandir(config.get('search_path')) if f.is_dir()]
    # There is no subdirectories - we been probably given just a single directory
    if len(list_dir) == 0:
        list_dir = [config.get('search_path')]
    for dir in list_dir:
        print (dir)
        # Let's read workload tag
        try:
            tag_file = os.path.join(dir, "tag")
            with open(tag_file, "r") as f:
                    tag = f.read()

        except FileNotFoundError as e:
                tag = "default"

        finally:
            # Just get summary file and usee tag
            if config.get('mode') == "summary":
                summary = fnmatch.filter(os.listdir(dir), "*summary.csv")
                if summary:
                    df = pd.read_csv(os.path.join(dir, summary[0]))
                    experiments_data.append({'data': df, 'name': tag})
            else: # We need all runs and add tag and run number as a name
                runs =  fnmatch.filter(os.listdir(dir), "*[0-9].csv")
                for run in runs:
                    run_no = re.search(r"_(\d).csv", run).group(1)
                    df = pd.read_csv(os.path.join(dir, run))
                    experiments_data.append({'data': df, 'name': f"{tag}_run_{run_no}"})

    return experiments_data