# DevOps Directive Site Load Testing

Corresponding Post: https://devopsdirective.com/posts/2020/03/load-testing-f1-micro/

---

- Tested using [k6](https://k6.io)
- Tests performed on n1-standard-4 ubuntu 16.04 instance
- Tests performed with and without CloudFlare caching


For each test run I want:
- num vus
- num iterations
- num http reqs
- check failures
- data_received
- **ALL** http_* stats [avg, min, med, 90, 95, max]

---

*NOTE:* I know you can save k6 output as a JSON file, but doing so saves all of the requests while I just wanted the summary usually printed to stdout. This meant I then had to do some text parsing in order to work with it.



In [None]:
# imports 

import re
from os import walk, path
import pandas as pd
import plotly.graph_objects as go
import plotly

In [None]:

result_files = []
DIR_PATH = './results-n1-standard-8-60-second-tests'
for (dirpath, dirnames, filenames) in walk(DIR_PATH):
    for filename in sorted(filenames):
        if filename[0] != '.':
            result_files.append((filename, path.join(DIR_PATH, filename)))

In [None]:
# Parsing the text files

def parse_timestring_to_ms(t_string):
    UNIT_PREFIX = re.compile('[^(n|µ|m|s)]*')
    num = UNIT_PREFIX.search(t_string).group(0)
    if num == t_string:
        raise ValueError('No unit string found')
    prefix = t_string[len(num)]
    if prefix == 'n':
        return float(num) / 1000000
    elif prefix == 'µ':
        return float(num) / 1000
    elif prefix == 'm':
        return float(num)
    elif prefix == 's':
        return float(num) * 1000
    else:
        raise ValueError('Unit did not match ns, µs, ms, or s')
        
def convert_datastrings_to_mb(num, unit):
    if unit == 'MB':
        return float(num)
    elif unit == 'GB':
        return float(num) * 1000
    else:
        raise ValueError('Unit did not equal MB or GB')

def extract_results_dict(file_tuple):
    result = {}
    
    PERCENTILES = ['avg', '0', '50', '100', '90', '95']
    
    FIELD = re.compile(r'(\w+)\.*:(.*)', re.DOTALL)  # split the line to name:value
    VALUES = re.compile(r'(?<==).*?(?=\s|$)')  # match individual values from http_req_* fields
    result['cached'] = True if file_tuple[0].split('-')[1] == 'cache' else False
    filepath = file_tuple[1]

    # open the input file `k6_input.log` for reading, and k6_parsed.log` for parsing
    with open(filepath, "r") as f_in:
        for i, line in enumerate(f_in):  # read the input file line by line
            if i < 25: # skip header content
                continue
            field = FIELD.match(line.strip())  # first match all <field_name>...:<values> fields
            if field:
                name = field.group(1)  # get the field name from the first capture group
                value = field.group(2)  # get the field value from the second capture group
                if name[:9] == "http_req_":
                    http_req_ms = list(map(parse_timestring_to_ms, VALUES.findall(value)))
                    for percentile, req_ms in zip(PERCENTILES, http_req_ms):
                        result[f'{name}_{percentile}'] = req_ms
                if name == 'check_failure_rate':
                    line_split = line.split()
                    valid_checks = line_split[-1]
                    invalid_checks = line_split[-3]
                    result['valid_checks'] = valid_checks
                    result['invalid_checks'] = invalid_checks
                    result['invalid_percentage'] = 100*int(invalid_checks)/(int(valid_checks) + int(invalid_checks))
                if name in ['vus', 'iterations', 'http_reqs']:
                    num = line.split()[1]
                    result[name] = num
                if name == 'data_received':
                    line_split = line.split()
                    num = line_split[1]
                    unit = line_split[2]
                    result[name] = convert_datastrings_to_mb(num, unit)
    return result

results = {}
for file_tuple in result_files:
    filename = file_tuple[0]
    results[filename] = extract_results_dict(file_tuple)

# Each result value is a dict shaped:
# 
# {
#     'cache': True,
#     'valid_checks': '3794', 
#     'invalid_checks': '0', 
#     'data_received': 1700.0, 
#     'http_req_*_avg': 13.99
#     'http_req_*_0': 13.99
#     'http_req_*_50': 13.99
#     'http_req_*_100': 13.99
#     'http_req_*_90': 13.99
#     'http_req_*_95': 13.99
#     'http_reqs': '49709', 
#     'iterations': '3584', 
#     'vus': '1536'
# }


In [None]:
# transforming into format for plotting

cached = {}
uncached = {}

# initilize empty list values
for key in results['0006-cache-summary.txt'].keys():
    cached[key] = []
    uncached[key] = []
    
for result in results.values():  
    is_cached = result['cached']
    for key, value in result.items():
        if is_cached:
            cached[key].append(value)
        else:
            uncached[key].append(value)

cached_df = pd.DataFrame.from_dict(cached)
uncached_df = pd.DataFrame.from_dict(uncached)

uncached_without_failed_test_df = uncached_df.drop(uncached_df.tail(1).index)
cached_df.head()

In [None]:
# plotting
def plot_timing_data(metric):
    fig = go.Figure()
    
    cached_x = cached_df['vus']
    uncached_x = uncached_df['vus']
    
    series_name = f'{metric}_50'
    fig.add_trace(go.Scatter(
        x=cached_x, 
        y=cached_df[series_name],
        name=f'Cached 50% (median)',
        line=dict(color='limegreen', width=2, dash='dot')
    ))
    fig.add_trace(go.Scatter(
        x=uncached_x, 
        y=uncached_without_failed_test_df[series_name],
        name=f'Uncached 50% (median)',
        line=dict(color='limegreen', width=2)
    ))

    series_name = f'{metric}_90'
    fig.add_trace(go.Scatter(
        x=cached_x, 
        y=cached_df[series_name],
        name=f'Cached 90%',
        line=dict(color='orange', width=2, dash='dot')
    ))
    fig.add_trace(go.Scatter(
        x=uncached_x, 
        y=uncached_without_failed_test_df[series_name],
        name=f'Uncached 90%',
        line=dict(color='orange', width=2)
    ))

    series_name = f'{metric}_95'
    fig.add_trace(go.Scatter(
        x=cached_x, 
        y=cached_df[series_name],
        name='Cached 95%',
        line=dict(color='red', width=2, dash='dot')
    ))
    fig.add_trace(go.Scatter(
        x=uncached_x, 
        y=uncached_without_failed_test_df[series_name],
        name='Uncached 95%',
        line=dict(color='red', width=2)
    ))

    
    
    fig.update_layout(
        title= f'{metric} vs. Number of Virtual Users',
        xaxis_title='Virtual Users',
        yaxis_title='Time (ms)',
        width=1000, 
        height=450,
        annotations=[
                go.layout.Annotation(
                    text=' <br>NOTE: 74% of virtual users in <br> final uncached test (excluded from <br> plot) never received a response, <br>  causing invalid timing results<br>  ',
                    align='center',
                    showarrow=False,
                    x=1400,
                    yref='paper',
                    y=1,
                    bordercolor='black',
                    borderwidth=1
                )
            ]
    )

    fig.show()
#     fig.write_image(f'{metric}.png')
#     fig.write_html(f'{metric}.html')
    return fig
    
fig = plot_timing_data('http_req_duration')
# plotly.offline.plot(fig, filename = 'filename.html', auto_open=False)

In [None]:
def plot_nontiming_data(metric, y_label):
    fig = go.Figure()

    cached_x = cached_df['vus']
    uncached_x = uncached_df['vus']

    series_name = metric
    fig.add_trace(go.Scatter(
        x=cached_x, 
        y=cached_df[series_name],
        name=f'Cached',
        line=dict(color='limegreen', width=2, dash='dot')
    ))
    fig.add_trace(go.Scatter(
        x=cached_x, 
        y=uncached_without_failed_test_df[series_name],
        name=f'Uncached',
        line=dict(color='limegreen', width=2)
    ))


    fig.update_layout(
        title= f'{series_name} vs. Number of Virtual Users',
        xaxis_title='Virtual Users',
        yaxis_title=y_label,
        width=1000, 
        height=450,
        annotations=[
                go.layout.Annotation(
                    text=' <br>NOTE: 74% of virtual users in <br> final uncached test (excluded from <br> plot) never received a response, <br> ',
                    align='center',
                    showarrow=False,
                    x=1600,
                    yref='paper',
                    y=0,
                    bordercolor='black',
                    borderwidth=1
                )
            ]
    )

#     fig.write_image(f'{metric}.png')
#     fig.write_html(f'{metric}.html')
    fig.show()
    return fig

fig = plot_nontiming_data('data_received', 'Data (MB)')