In [44]:
import pandas as pd
import json, numpy as np, math
import scipy.stats
import os

In [30]:
def get_bw(x):
    l = x["server_measurements"]
    bw_list = [8*float(x["TCPInfo"]["BytesReceived"]) / float(x["TCPInfo"]["ElapsedTime"]) for x in l]
    return bw_list

def get_rtt(x):
    l = x["server_measurements"]
    rtt_list = [int(int(x["TCPInfo"]["RTT"])/2) for x in l]
    return rtt_list 

def get_rtt_var(x):
    l = x["server_measurements"]
    rtt_var = [int(int(x["TCPInfo"]["RTTVar"])/2) for x in l]
    return rtt_var

def get_loss(x):
    l = x["server_measurements"]
    loss_list = []
    loss = 0
    for i in range(1, len(l)):
        x1 = l[i]["TCPInfo"]
        x2 = l[i-1]["TCPInfo"]
        retrans = float(x1["TotalRetrans"]) - float(x2["TotalRetrans"])
        segs_in = float(x1["SegsIn"]) - float(x2["SegsIn"])
        if segs_in != 0:
            loss = retrans / segs_in
        loss_list.append(loss)
    return loss_list

def write_stat(x):
    (mean_bw, std_bw) = x["bw_stat"]
    (mean_bw, std_bw) = (int(1000*mean_bw), int(1000*std_bw))
    (rtt_list, rtt_var_list) = (x["rtt_list"], x["rtt_var_list"])
    mean_rtt = int(np.mean(rtt_list))
    var_rtt = int(np.mean(rtt_var_list))
    outfile = f"traces/{mean_bw}_{std_bw}_{mean_rtt}_{var_rtt}_{len(rtt_list)}.csv"
    f = open(outfile, 'w')
    random = scipy.stats.truncnorm.rvs(-0.5, 0.5, size=100)    
    for i in range(0, len(rtt_list)):
        bw = int(random[i]*std_bw + mean_bw)
        l = ','.join(list(map(str, [bw, rtt_list[i], rtt_var_list[i]]))) + '\n'
        f.write(l)
    f.close()

In [40]:
fname = "../data/bq-results-20220615-161531-1655310096986.json"
content = open(fname).readlines()
data_list = []
for l in content:
    data = json.loads(l[:-1])
    bw_list = get_bw(data)
    rtt_list = get_rtt(data)
    rtt_var_list = get_rtt_var(data)
    (mean_bw, std_bw) = (int(1000*np.mean(bw_list)), int(1000*np.std(bw_list)))
    mean_rtt = int(np.mean(rtt_list))
    var_rtt = int(np.std(rtt_list))
    outfile = f"traces_new/{mean_bw}_{std_bw}_{mean_rtt}_{var_rtt}_{len(rtt_list)}.csv"
    f = open(outfile, 'w')
    random = scipy.stats.truncnorm.rvs(-0.5, 0.5, size=100)    
    for i in range(0, len(rtt_list)):
        bw = int(random[i]*std_bw + mean_bw)
        l = ','.join(list(map(str, [bw, rtt_list[i], rtt_var_list[i]]))) + '\n'
        f.write(l)
    f.close()

In [14]:


df["bw_list"] = df["server_measurements_json"].apply(get_bw)
df["rtt_list"] = df["server_measurements_json"].apply(get_rtt)
df["rtt_var_list"] = df["server_measurements_json"].apply(get_rtt_var)
df["loss"] = df["server_measurements_json"].apply(get_loss)
df["bw_stat"] = df["bw_list"].apply(lambda x: (np.mean(x), np.std(x)))

In [54]:
def find_nth(haystack, needle, n):
    start = haystack.find(needle)
    while start >= 0 and n > 1:
        start = haystack.find(needle, start+len(needle))
        n -= 1
    return start

from glob import glob
dname = "traces_new/"
filelist = glob(f"{dname}/*.csv")
for filename in filelist:
    idx = filename.rfind("_")
    num_sec = int(filename[idx+1:-4])
    idx2 = find_nth(filename, "_", 2)
    bw = int(filename[len(dname):idx2])
    if num_sec < 30 or bw < 100:
        os.system("rm %s" % filename)