In [1]:
import json
import os
import glob
import pandas

## The path to the env dir of the experimental campaign

In [2]:
RESULT_PATH = "../../results/threading_eventlet/test_case_1"

## Inserting some ombt code (this could be removed if used as a library)

In [3]:
import math

class Stats(object):
    """Manage a single statistic"""
    def __init__(self, min=None, max=None, total=0, count=0,
                 sum_of_squares=0, distribution=None):
        self.min = min
        self.max = max
        self.total = total
        self.count = count
        self.sum_of_squares = sum_of_squares
        # distribution of values grouped by powers of 10
        self.distribution = distribution or dict()

    @classmethod
    def from_dict(cls, values):
        if 'distribution' in values:
            # hack alert!
            # when a Stats is passed via an RPC call it appears as if the
            # distribution map's keys are converted from int to str.
            # Fix that by re-indexing the distribution map:
            new_dict = dict()
            old_dict = values['distribution']
            for k in old_dict.keys():
                new_dict[int(k)] = old_dict[k];
            values['distribution'] = new_dict
        return Stats(**values)

    def to_dict(self):
        new_dict = dict()
        for a in ["min", "max", "total", "count", "sum_of_squares"]:
            new_dict[a] = getattr(self, a)
        new_dict["distribution"] = self.distribution.copy()
        return new_dict

    def update(self, value):
        self.total += value
        self.count += 1
        self.sum_of_squares += value**2
        self.min = min(self.min, value) if self.min else value
        self.max = max(self.max, value) if self.max else value
        log = int(math.log10(value)) if value >= 1.0 else 0
        base = 10**log
        index = int(value/base)  # 0..9
        if log not in self.distribution:
            self.distribution[log] = [0 for i in range(10)]
        self.distribution[log][index] += 1

    def reset(self):
        self.__init__()

    def average(self):
        return (self.total / float(self.count)) if self.count else 0

    def std_deviation(self):
        return math.sqrt((self.sum_of_squares / float(self.count))
                         - (self.average() ** 2)) if self.count else -1

    def merge(self, stats):
        if stats.min is not None and self.min is not None:
            self.min = min(self.min, stats.min)
        else:
            self.min = self.min or stats.min
        if stats.max is not None and self.max is not None:
            self.max = max(self.max, stats.max)
        else:
            self.max = self.max or stats.max

        self.total += stats.total
        self.count += stats.count
        self.sum_of_squares += stats.sum_of_squares
        for k in stats.distribution.keys():
            if k in self.distribution:
                self.distribution[k] = [z for z in map(lambda a, b: a + b,
                                                       stats.distribution[k],
                                                       self.distribution[k])]
            else:
                self.distribution[k] = stats.distribution[k]

    def __str__(self):
        return "min=%i, max=%i, avg=%f, std-dev=%f" % (self.min, self.max,
                                                       self.average(),
                                                       self.std_deviation())

    def print_distribution(self):
        keys = list(self.distribution.keys())
        keys.sort()
        for order in keys:
            row = self.distribution[order]
            # order=0, index=0 is special case as it is < 1.0, for all orders >
            # 0, index 0 is ignored since everthing < 10^order is accounted for
            # in index 9 of the (order - 1) row
            index = 0 if order == 0 else 1
            while index < len(row):
                print("[%d..<%d):  %d" %
                      ((10 ** int(order)) * index,
                       (10 ** int(order)) * (index + 1),
                       row[index]))
                index += 1

class TestResults(object):
    """Client results of a test run.
    """
    def __init__(self, start_time=None, stop_time=None, latency=None,
                 msgs_ok=0, msgs_fail=0, errors=None):
        super(TestResults, self).__init__()
        self.start_time = start_time
        self.stop_time = stop_time
        self.latency = latency or Stats()
        self.msgs_ok = msgs_ok  # count of successful msg transfers
        self.msgs_fail = msgs_fail  # count of failed msg transfers
        self.errors = errors or dict()  # error msgs and counts

    @classmethod
    def from_dict(cls, values):
        if 'latency' in values:
            values['latency'] = Stats.from_dict(values['latency'])
        if 'errors' in values:
            values['errors'] = values['errors'].copy()
        return TestResults(**values)

    def to_dict(self):
        new_dict = dict()
        for a in ['start_time', 'stop_time', 'msgs_ok', 'msgs_fail']:
            new_dict[a] = getattr(self, a)
        new_dict['latency'] = self.latency.to_dict()
        new_dict['errors'] = self.errors.copy()
        return new_dict

    def error(self, reason):
        key = str(reason)
        self.errors[key] = self.errors.get(key, 0) + 1

    def reset(self):
        self.__init__()

    def merge(self, results):
        self.start_time = (min(self.start_time, results.start_time)
                           if self.start_time and results.start_time
                           else (self.start_time or results.start_time))
        self.stop_time = (max(self.stop_time, results.stop_time)
                              if self.stop_time and results.stop_time
                          else (self.stop_time or results.stop_time))
        self.msgs_ok += results.msgs_ok
        self.msgs_fail += results.msgs_fail
        self.latency.merge(results.latency)
        for err in results.errors:
            self.errors[err] = self.errors.get(err, 0) + results.errors[err]

    def print_results(self):
        if self.msgs_fail:
            print("Error: %d message transfers failed"
                  % self.msgs_fail)
        if self.errors:
            print("Error: errors detected:")
            for err in self.errors:
                print("  '%s' (occurred %d times)" % (err, self.errors[err]))

        total = self.msgs_ok + self.msgs_fail
        print("Total Messages: %d" % total)

        delta_time = self.stop_time - self.start_time
        print("Test Interval: %f - %f (%f secs)" % (self.start_time,
                                                    self.stop_time,
                                                    delta_time))

        if delta_time > 0.0:
            print("Aggregate throughput: %f msgs/sec" % (float(total)/delta_time))

        latency = self.latency
        if latency.count:
            print("Latency %d samples (msecs): Average %f StdDev %f"
                  " Min %f Max %f"
                  % (latency.count,
                     latency.average(), latency.std_deviation(),
                     latency.min, latency.max))
            print("Latency Distribution: ")
            latency.print_distribution()


In [4]:
def load_stats(param):
    """Loads the stats for the controller output file."""
    
    controller_docker = os.path.join(RESULT_PATH, param["backup_dir"], "*controller*.log")
    # beware of the files _docker.log that would also match
    # take [0] to get rid of them for now
    files = glob.glob(controller_docker)
    controller_log = files[0]
    a = []
    with open(controller_log) as f:
        a = f.readlines()
        return json.loads(a[0]), json.loads(a[1])


In [5]:
params = []
with open(os.path.join(RESULT_PATH, "./params.json")) as f:
    params = json.load(f)

In [6]:
def build_agg_results(results):
    agg = TestResults()
    for result in results:
        result["latency"] = Stats(**result["latency"])
        agg.merge(TestResults(**result))
        
    duration = agg.stop_time - agg.start_time
    total = agg.msgs_ok + agg.msgs_fail
    rate = float(total)/duration
    result = agg.to_dict()
    result["rate"] = rate
    return result

    
for param in params:
    clients, servers = load_stats(param)
    # what has been seen by ombt
    param["_ombt_clients"] = len(clients.values())
    param["_ombt_servers"] = len(servers.values())
    #param["_raw_servers_test_result"] = servers
    #param["_raw_clients_test_result"] = clients
    param["_agg_servers"] = build_agg_results(servers.values())
    param["_agg_clients"] = build_agg_results(clients.values())


In [7]:
with open("params_calculated.json", "w") as f:
    json.dump(params, f)

In [8]:
def augment(mydict, myparams, in_key, out_key=None):
    out_key = out_key or in_key
    mydict.update({out_key: [p[in_key] for p in myparams]})

In [9]:
extraction = {}
augment(extraction, params, "_ombt_clients", "clients")
augment(extraction, params, "_ombt_servers", "servers")
augment(extraction, params, "executor")
augment(extraction, params, "call_type")
extraction.update({
    "server_rate": [p["_agg_servers"]["rate"] for p in params]
})
extraction.update({
    "client_rate": [p["_agg_clients"]["rate"] for p in params]
})

In [10]:
df = pandas.DataFrame(extraction)
df

Unnamed: 0,call_type,client_rate,clients,executor,server_rate,servers
0,rpc-call,371.334883,1,threading,371.778178,1
1,rpc-call,348.927736,1,eventlet,349.326537,1
2,rpc-cast,1344.59928,1,eventlet,1347.33293,1
3,rpc-cast,1156.207023,1,threading,1158.226457,1
4,rpc-call,713.77562,2,threading,714.620277,1
5,rpc-cast,2489.178289,2,threading,2493.470423,2
6,rpc-call,754.70188,2,eventlet,755.469575,2
7,rpc-call,675.494213,2,eventlet,676.310842,1
8,rpc-cast,2337.756858,2,threading,1744.275477,1
9,rpc-cast,2550.988087,2,eventlet,2554.089459,2


## recovering metrics from influxdb

### Initializing docker

In [11]:
import docker
from influxdb import InfluxDBClient

client = docker.from_env()

### Getting the interesting metrics from influx

In [18]:
import tarfile
import time

for param in params:
    # get experimentation boundaries
    start_time = max(param['_agg_clients']['start_time'], param['_agg_servers']['start_time'])
    stop_time = max(param['_agg_clients']['stop_time'], param['_agg_servers']['stop_time'])
    duration = stop_time - start_time

    tar = os.path.join(RESULT_PATH, param['backup_dir'], 'influxdb-data.tar.gz')
    tarfile.open(tar).extractall()
    #docker run --name influxdb -v $(pwd)/influxdb-data:/var/lib/influxdb -p 8083:8083 -p 8086:8086 -ti influxdb
    try:
        container = client.containers.run(
            'influxdb',
            detach=True,
            ports={'8086/tcp': 8086, '8083/tcp': 8083},
            volumes={os.path.join(os.getcwd(), 'influxdb-data'): {'bind': '/var/lib/influxdb', 'mode': 'rw'}}
        )    
        influx = InfluxDBClient(database='telegraf')
        # TODO(msimonin): make a tcp socket retry test on port 8083
        time.sleep(5)
        result = influx.query("select max(messages_published) from rabbitmq_overview")
        print(param["nbr_clients"], param["nbr_servers"], result)
    except Error as e:
        print(e)
    finally:
        container.remove(force=True)

1 1 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T09:48:10Z', 'max': 0}]})
1 1 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T09:50:15Z', 'max': 2002}]})
1 1 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T09:52:00Z', 'max': 1002}]})
1 1 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T09:53:45Z', 'max': 1002}]})
2 1 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T09:57:05Z', 'max': 4004}]})
2 2 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T09:59:00Z', 'max': 2004}]})
2 2 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T10:01:00Z', 'max': 4004}]})
2 1 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T10:03:00Z', 'max': 4004}]})
2 1 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T10:04:55Z', 'max': 2004}]})
2 2 ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T10:08:20Z', 'max': 2004}]})
2 1 ResultSet({'('rabbitmq_overview', None)

ResultSet({'('rabbitmq_overview', None)': [{'time': '2017-12-18T10:10:20Z', 'max': 2004}]})