In [None]:
import os

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

import client as cp
import middleware as mp

# Matplot lib settings
%matplotlib notebook
matplotlib.rcParams.update({
        'font.size': 11,
        'font.family': 'sans-serif',
        'xtick.labelsize': 9,
        'ytick.labelsize': 9
    })

# Numpy settings
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

In [None]:
base_dir = '/Users/ruifengxu/Development/asl-fall16-project/logs/replication/'
client_log_template = 'server-{}-replication-{}/repetition-{}-client-{}.log'
middleware_log_template = 'server-{}-replication-{}/repetition-{}-trace.log'
colors = ['mediumturquoise', 'red', 'green', 'saddlebrown', 'royalblue', 'darkmagenta', 'orange']
markers = ['^', 'v', '<', 'o', 's', '*', 'd', '>']

replication_factors = [
    [1, 2, 3],
    [1, 3, 5],
    [1, 4, 7]
]
num_clients = 110

In [None]:
# Pooled variance formulation
def parse_client_entries_2(num_servers,
                           replication_factor,
                           num_repetitions, 
                           num_client_machines, 
                           num_clients,
                           request_type,
                           offset, 
                           duration):
    local_key = 'all_local'
    global_key = 'all_global'
    if request_type == 'g':
        local_key = 'get_local'
        global_key = 'get_global'
    elif request_type == 's':
        local_key = 'set_local'
        global_key = 'set_global'
        
    tps = np.zeros(num_repetitions)

    rts = np.zeros(duration * num_repetitions * num_client_machines)
    rts_weight = np.zeros(duration * num_repetitions * num_client_machines)
    rts_index = 0
    rts_var = 0.0
    
    start = offset
    end = start + duration 

    for repetition in range(1, num_repetitions + 1, 1):
        repetition_tp = 0

        for log_id in range(1, num_client_machines + 1, 1):
            filename = os.path.join(
                base_dir,
                client_log_template.format(num_servers, replication_factor, repetition, log_id)
            )
            data = cp.parse_log(filename)
            
            num_ops = data[global_key][end - 2]['num_ops'] - data[global_key][start - 2]['num_ops']

            repetition_tp = repetition_tp + (num_ops // duration)

            for entry in data[local_key][start - 1: end - 1]:
                rts[rts_index] = entry['rt_mean']
                rts_weight[rts_index] = entry['num_ops']
                rts_index = rts_index + 1
                rts_var = rts_var + float(
                    entry['rt_std'] * entry['rt_std'] * (entry['num_ops'] - 1))
        tps[repetition - 1] = repetition_tp

    tp_mean = np.floor(np.mean(tps))
    tp_std = np.floor(np.std(tps, ddof=1))
    tp_ci = np.floor(2.776 * tp_std / np.sqrt(num_repetitions))

    rt_mean = np.average(rts, weights=rts_weight)
    rt_std = np.sqrt(
        rts_var / 
        (np.sum(rts_weight) - (duration * num_repetitions * num_client_machines))
    )
    rt_ci = 1.960 * rt_std / np.sqrt(duration * num_repetitions * num_client_machines)

    print('{:3} {:3} ({:3}): {:10.0f} {:10.0f} {:10.0f} {:10.3f} {:10.3f} {:10.3f} {:10.3f}'.format(
            num_servers, replication_factor, num_clients * num_client_machines,
            tp_mean, tp_std, tp_ci,
            rt_mean, rt_std, rt_ci,
            1 / rt_mean * 1000 * num_clients * num_client_machines / tp_mean))    
    return {
        'tp': {
            'mean': tp_mean,
            'std': tp_std,
            'ci': tp_ci
        },
        'rt': {
            'mean': rt_mean,
            'std': rt_std,
            'ci': rt_ci
        }
    }

In [None]:
# Percentile variation
def parse_client_entries_3(num_servers,
                           replication_factor,
                           num_repetitions, 
                           num_client_machines, 
                           num_clients,
                           request_type,
                           offset, 
                           duration):
    local_key = 'all_local'
    global_key = 'all_global'
    bucket_key = 'all_bucket'
    if request_type == 'g':
        local_key = 'get_local'
        global_key = 'get_global'
        bucket_key = 'get_bucket'
    elif request_type == 's':
        local_key = 'set_local'
        global_key = 'set_global'
        bucket_key = 'set_bucket'

    start = offset
    end = start + duration 
        
    tps = np.zeros(num_repetitions)

    rts = np.zeros(num_client_machines * num_repetitions * duration)
    rts_weight = np.zeros(num_client_machines * num_repetitions * duration)
    rts_index = 0
    rts_bucket = None


    for repetition in range(1, num_repetitions + 1, 1):
        repetition_tp = 0

        for log_id in range(1, num_client_machines + 1, 1):
            filename = os.path.join(
                base_dir,
                client_log_template.format(num_servers, replication_factor, repetition, log_id)
            )
            data = cp.parse_log(filename)
            
            num_ops = data[global_key][end - 2]['num_ops'] - data[global_key][start - 2]['num_ops']

            repetition_tp = repetition_tp + (num_ops // duration)

            for entry in data[local_key][start - 1: end - 1]:
                rts[rts_index] = entry['rt_mean']
                rts_weight[rts_index] = entry['num_ops']
                rts_index = rts_index + 1
            
            if rts_bucket is None:
                rts_bucket = list(data[bucket_key])
            else:
                rts_bucket = [x + y for x, y in zip(rts_bucket, data[bucket_key])]

        tps[repetition - 1] = repetition_tp

    tp_mean = np.floor(np.mean(tps))
    tp_std = np.floor(np.std(tps, ddof=1))
    tp_ci = np.floor(2.776 * tp_std / np.sqrt(num_repetitions))

    rt_mean = np.average(rts, weights=rts_weight)
    rt_plow = cp.get_percentile(rts_bucket, 5)
    rt_phigh = cp.get_percentile(rts_bucket, 95)

    print('{:3} {:3} ({:3}) {:10.0f} {:10.0f} {:10.0f} {:10.3f} {:10.3f} {:10.3f} {:10.3f}'.format(
            num_servers, replication_factor, num_clients * num_client_machines,
            tp_mean, tp_std, tp_ci,
            rt_mean, rt_plow, rt_phigh,
            1 / rt_mean * 1000 * num_clients * num_client_machines / tp_mean))    
    
    return {
        'tp': {
            'mean': tp_mean,
            'std': tp_std,
            'ci': tp_ci
        },
        'rt': {
            'mean': rt_mean,
            'plow': rt_plow,
            'phigh': rt_phigh
        }
    }

In [None]:
for i, num_servers in enumerate([3, 5, 7]):
    for replication_factor in replication_factors[i]:
        for repetition in range(1, 6, 1):
            filename = os.path.join(
                base_dir,
                middleware_log_template.format(num_servers, replication_factor, repetition)
            )
            print(mp.count_client_connection_time(filename), end = ' ')
        print()
    print()

In [None]:
for i, num_servers in enumerate([3, 5, 7]):
    for replication_factor in replication_factors[i]:
        data = parse_client_entries_3(num_servers, replication_factor, 5, 3, num_clients, 'a', 11, 40)
    
    print()

In [None]:
fig, ax = plt.subplots(figsize=(10, 3))

ax = plt.subplot(1, 2, 1)

N = 3
mean = []
ci = []
width = 0.20
index =  np.arange(N)

for i, num_servers in enumerate([3, 5, 7]):
    data = parse_client_entries_3(num_servers, replication_factors[i][0], 5, 3, num_clients, 'g', 11, 40)
    mean.append(data['tp']['mean'])
    ci.append(data['tp']['ci'])    
rects1 = ax.bar(index + width, mean, width, yerr=ci,
                alpha=0.6, color=colors[0], hatch="/")

mean = []
ci = []
for i, num_servers in enumerate([3, 5, 7]):
    data = parse_client_entries_3(num_servers, replication_factors[i][1], 5, 3, num_clients, 'g', 11, 40)
    mean.append(data['tp']['mean'])
    ci.append(data['tp']['ci'])    
rects2 = ax.bar(index + width * 2, mean, width, yerr=ci,
                alpha=0.6, color=colors[1], hatch='.')

mean = []
ci = []
for i, num_servers in enumerate([3, 5, 7]):
    data = parse_client_entries_3(num_servers, replication_factors[i][2], 5, 3, num_clients, 'g', 11, 40)
    mean.append(data['tp']['mean'])
    ci.append(data['tp']['ci'])    
rects3 = ax.bar(index + width * 3, mean, width, yerr=ci,
                alpha=0.6, color=colors[2], hatch='-')


ax.set_xticks(index + width * 2.5)
ax.set_xticklabels(('3 Servers', '5 Servers', '7 Servers'))
ax.set_ylim([0, 20000])
ax.set_yticks(np.arange(0, 20000, 2000))
ax.set_ylabel('Aggregated throughput (request/s)')
ax.legend((rects1[0], rects2[0], rects3[0]), ('No-replication', 'Half-replication', 'Full-replication'),
          bbox_to_anchor=(0., 1., 1., 0.), loc=9, ncol=3, 
          mode="expand", borderaxespad=0., fontsize='small')
ax.set_title('(a) Get throughput')

plt.tight_layout()

ax = plt.subplot(1, 2, 2)

N = 3
mean = []
ci = []
plow = []
phigh = []
width = 0.20
index =  np.arange(N)

for i, num_servers in enumerate([3, 5, 7]):
    data = parse_client_entries_3(num_servers, replication_factors[i][0], 5, 3, num_clients, 'g', 11, 40)
    mean.append(data['rt']['mean'])
    plow.append(data['rt']['mean'] - data['rt']['plow'])
    phigh.append(data['rt']['phigh'] - data['rt']['mean'])
rects1 = ax.bar(index + width, mean, width, yerr=[plow, phigh],
                alpha=0.6, color=colors[0], hatch="/")

mean = []
ci = []
plow = []
phigh = []
for i, num_servers in enumerate([3, 5, 7]):
    data = parse_client_entries_3(num_servers, replication_factors[i][1], 5, 3, num_clients, 'g', 11, 40)
    mean.append(data['rt']['mean'])
    plow.append(data['rt']['mean'] - data['rt']['plow'])
    phigh.append(data['rt']['phigh'] - data['rt']['mean'])
rects2 = ax.bar(index + width * 2, mean, width, yerr=[plow, phigh],
                alpha=0.6, color=colors[1], hatch='.')

mean = []
ci = []
plow = []
phigh = []
for i, num_servers in enumerate([3, 5, 7]):
    data = parse_client_entries_3(num_servers, replication_factors[i][2], 5, 3, num_clients, 'g', 11, 40)
    mean.append(data['rt']['mean'])
    plow.append(data['rt']['mean'] - data['rt']['plow'])
    phigh.append(data['rt']['phigh'] - data['rt']['mean'])
rects3 = ax.bar(index + width * 3, mean, width, yerr=[plow, phigh],
                alpha=0.6, color=colors[2], hatch='-')


ax.set_xticks(index + width * 2.5)
ax.set_xticklabels(('3 Servers', '5 Servers', '7 Servers'))
ax.set_ylim([0, 80])
ax.set_yticks(np.arange(0, 80, 10))
ax.set_ylabel('Average reponse time (ms)')
ax.legend((rects1[0], rects2[0], rects3[0]), ('No-replication', 'Half-replication', 'Full-replication'),
          bbox_to_anchor=(0., 1., 1., 0.), loc=9, ncol=3, 
          mode="expand", borderaxespad=0., fontsize='small')
ax.set_title('(b) Get response time')


plt.tight_layout()
fig.savefig('replication-get.png', dpi=300, bbox_inches='tight', pad_inches=0)

In [None]:
def make_middleware_time_boxplot(num_servers, 
                                 replication_factor, 
                                 num_repetitions,
                                 offset, 
                                 duration):
    read_request_times = []
    queue_times = []
    send_request_times = []
    read_response_times = []
    send_response_times = []
    total_times = []
        
    for repetition in range(1, num_repetitions + 1, 1):
        filename = os.path.join(
            base_dir,
            middleware_log_template.format(num_servers, replication_factor, repetition)
        )
        data = mp.parse_log(filename, request_type='g')
        start_time = data[0][2] + offset
        end_time = start_time + duration
        for entry in data:                                                
            if entry[2] < start_time:
                continue
            if entry[2] == end_time:
                break
            if entry[6] is False:
                continue
            read_request_times.append(entry[8])
            queue_times.append(entry[9])
            send_request_times.append(entry[10])
            read_response_times.append(entry[11])
            send_response_times.append(entry[12])
            total_times.append(np.sum(entry[8:13])) 

    fig = plt.figure(figsize=(3, 3))
    plt.boxplot([
#             np.array(read_request_times) / 1000000, 
            np.array(queue_times) / 1000000, 
#             np.array(send_request_times) / 1000000, 
            np.array(read_response_times) / 1000000, 
#             np.array(send_response_times) / 1000000, 
            np.array(total_times) / 1000000,
        ],
        showmeans=True,
        sym='',
        whis=[0, 95]
    )
    plt.xticks([1, 2, 3], ['Queue', 'Sever', 'Total'])
    plt.ylim([0, 40])
    plt.yticks(np.arange(0, 40, 5))
    plt.ylabel('Time (ms)')
    ax = plt.gca()
    ax.yaxis.grid(True)
    ax.set_title('{} servers {} replication get'.format(num_servers, replication_factor))
    fig.savefig('replication-boxplot-{}-{}-{}.png'.format(num_servers, replication_factor, 'g'), 
                dpi=300, bbox_inches='tight', pad_inches=0.2)

In [None]:
make_middleware_time_boxplot(
    7,
    7,
    5,
    11,
    40
)