In [None]:
import os

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

import client as cp
import middleware as mp

# Matplot lib settings
%matplotlib notebook
matplotlib.rcParams.update({
        'font.size': 11,
        'font.family': 'sans-serif',
        'xtick.labelsize': 9,
        'ytick.labelsize': 9
    })

# Numpy settings
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

In [None]:
base_dir = '/Users/ruifengxu/Development/asl-fall16-project/logs/write/'
client_log_template = 'server-{}-replication-{}-write-{}/repetition-{}-client-{}.log'
middleware_log_template = 'server-{}-replication-{}-write-{}/repetition-{}-trace.log'
colors = ['mediumturquoise', 'red', 'green', 'saddlebrown', 'royalblue', 'darkmagenta', 'orange']
markers = ['^', 'v', '<', 'o', 's', '*', 'd', '>']

write_percents = [
    1, 5, 10
]
replication_factors = [
    [1, 3],
    [1, 5],
    [1, 7]
]
num_clients = 110

In [None]:
# Pooled variance formulation
def parse_client_entries_2(num_servers,
                           replication_factor,
                           num_repetitions, 
                           num_client_machines, 
                           num_clients,
                           write_percent,
                           request_type,
                           offset, 
                           duration):
    local_key = 'all_local'
    global_key = 'all_global'
    if request_type == 'g':
        local_key = 'get_local'
        global_key = 'get_global'
    elif request_type == 's':
        local_key = 'set_local'
        global_key = 'set_global'
        
    tps = np.zeros(num_repetitions)

    rts = np.zeros(duration * num_repetitions * num_client_machines)
    rts_weight = np.zeros(duration * num_repetitions * num_client_machines)
    rts_index = 0
    rts_var = 0.0
    
    start = offset
    end = start + duration 

    for repetition in range(1, num_repetitions + 1, 1):
        repetition_tp = 0

        for log_id in range(1, num_client_machines + 1, 1):
            filename = os.path.join(
                base_dir,
                client_log_template.format(num_servers, replication_factor, write_percent, repetition, log_id)
            )
            data = cp.parse_log(filename)
            
            num_ops = data[global_key][end - 2]['num_ops'] - data[global_key][start - 2]['num_ops']

            repetition_tp = repetition_tp + (num_ops // duration)

            for entry in data[local_key][start - 1: end - 1]:
                rts[rts_index] = entry['rt_mean']
                rts_weight[rts_index] = entry['num_ops']
                rts_index = rts_index + 1
                rts_var = rts_var + float(
                    entry['rt_std'] * entry['rt_std'] * (entry['num_ops'] - 1))
        tps[repetition - 1] = repetition_tp

    tp_mean = np.floor(np.mean(tps))
    tp_std = np.floor(np.std(tps, ddof=1))
    tp_ci = np.floor(2.776 * tp_std / np.sqrt(num_repetitions))

    rt_mean = np.average(rts, weights=rts_weight)
    rt_std = np.sqrt(
        rts_var / 
        (np.sum(rts_weight) - (duration * num_repetitions * num_client_machines))
    )
    rt_ci = 1.960 * rt_std / np.sqrt(duration * num_repetitions * num_client_machines)

    print('{:3} {:3} ({:3}): {:10.0f} {:10.0f} {:10.0f} {:10.3f} {:10.3f} {:10.3f} {:10.3f}'.format(
            num_servers, replication_factor, num_clients * num_client_machines,
            tp_mean, tp_std, tp_ci,
            rt_mean, rt_std, rt_ci,
            1 / rt_mean * 1000 * num_clients * num_client_machines / tp_mean))    
    return {
        'tp': {
            'mean': tp_mean,
            'std': tp_std,
            'ci': tp_ci
        },
        'rt': {
            'mean': rt_mean,
            'std': rt_std,
            'ci': rt_ci
        }
    }

In [None]:
# Percentile variation
def parse_client_entries_3(num_servers,
                           replication_factor,
                           num_repetitions, 
                           num_client_machines, 
                           num_clients,
                           write_percent,
                           request_type,
                           offset, 
                           duration):
    local_key = 'all_local'
    global_key = 'all_global'
    bucket_key = 'all_bucket'
    if request_type == 'g':
        local_key = 'get_local'
        global_key = 'get_global'
        bucket_key = 'get_bucket'
    elif request_type == 's':
        local_key = 'set_local'
        global_key = 'set_global'
        bucket_key = 'set_bucket'

    start = offset
    end = start + duration 
        
    tps = np.zeros(num_repetitions)

    rts = np.zeros(num_client_machines * num_repetitions * duration)
    rts_weight = np.zeros(num_client_machines * num_repetitions * duration)
    rts_index = 0
    rts_bucket = None


    for repetition in range(1, num_repetitions + 1, 1):
        repetition_tp = 0

        for log_id in range(1, num_client_machines + 1, 1):
            filename = os.path.join(
                base_dir,
                client_log_template.format(num_servers, replication_factor, write_percent, repetition, log_id)
            )
            data = cp.parse_log(filename)
            
            num_ops = data[global_key][end - 2]['num_ops'] - data[global_key][start - 2]['num_ops']

            repetition_tp = repetition_tp + (num_ops // duration)

            for entry in data[local_key][start - 1: end - 1]:
                rts[rts_index] = entry['rt_mean']
                rts_weight[rts_index] = entry['num_ops']
                rts_index = rts_index + 1
            
            if rts_bucket is None:
                rts_bucket = list(data[bucket_key])
            else:
                rts_bucket = [x + y for x, y in zip(rts_bucket, data[bucket_key])]

        tps[repetition - 1] = repetition_tp

    tp_mean = np.floor(np.mean(tps))
    tp_std = np.floor(np.std(tps, ddof=1))
    tp_ci = np.floor(2.776 * tp_std / np.sqrt(num_repetitions))

    rt_mean = np.average(rts, weights=rts_weight)
    rt_plow = cp.get_percentile(rts_bucket, 5)
    rt_phigh = cp.get_percentile(rts_bucket, 95)

    print('{:3} {:3} ({:3}) {:10.0f} {:10.0f} {:10.0f} {:10.3f} {:10.3f} {:10.3f} {:10.3f}'.format(
            num_servers, replication_factor, num_clients * num_client_machines,
            tp_mean, tp_std, tp_ci,
            rt_mean, rt_plow, rt_phigh,
            1 / rt_mean * 1000 * num_clients * num_client_machines / tp_mean))    
    
    return {
        'tp': {
            'mean': tp_mean,
            'std': tp_std,
            'ci': tp_ci
        },
        'rt': {
            'mean': rt_mean,
            'plow': rt_plow,
            'phigh': rt_phigh
        }
    }

In [None]:
for write_percent in write_percents:
    print(write_percent)
    for i, num_servers in enumerate([3, 5, 7]):
        print(num_servers)
        for replication_factor in replication_factors[i]:
            for repetition in range(1, 6, 1):
                filename = os.path.join(
                    base_dir,
                    middleware_log_template.format(num_servers, replication_factor, write_percent, repetition)
                )
                print(mp.count_client_connection_time(filename), end = ' ')
            print()
        print()
    print()

In [None]:
for write_percent in write_percents:
    print(write_percent)
    for i, num_servers in enumerate([3, 5, 7]):
        for replication_factor in replication_factors[i]:
            data = parse_client_entries_3(
                num_servers, replication_factor, 5, 3, num_clients, write_percent, 'a', 11, 40)

        print()