This is an experiment with `packetsBeforeAck = 2, 10, 15`.

In [None]:
import os
import ast
import json
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [None]:
database = "/home/wmnlab/G/quic_data/"
dates = [
    # "2024-07-29",
    # "2024-07-30",
    "2024-08-04"
]
devices = sorted([
    "sm00",
    "sm01",
    "sm02",
    "sm03",
    # "sm08",
    # "MacBookProM1",
])
exps = {
    "QUIC-ack1": (2, ["#{:02d}".format(i + 1) for i in range(2)]),
    "QUIC-ack2": (2, ["#{:02d}".format(i + 1) for i in range(2)]),
    "QUIC-ack10": (2, ["#{:02d}".format(i + 1) for i in range(2)]),
    "QUIC-ack15": (2, ["#{:02d}".format(i + 1) for i in range(2)]),
}

exp_duration = 200
data_len = 1223 
time_data = exp_duration * 125000

device_to_port = {"sm00": [5200, 5201], 
                  "sm01": [5202, 5203],
                  "sm02": [5204, 5205],
                  "sm03": [5206, 5207],
                  "sm04": [5208, 5209],
                  "sm05": [5210, 5211],
                  "sm06": [5212, 5213],
                  "sm07": [5214, 5215],
                  "sm08": [5216, 5217],
                  "sm09": [5218, 5219],
                  "MacBookProM1": [4200, 4201],
                  }

In [None]:
def find_raw_files(database, date, exp, device):
    exp_files_list = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'raw')
        for root, dirs, files in os.walk(folder_path):
            exp_files = ["", "", "", ""] # ack_ul_sent_raw_file, ack_ul_rcv_raw_file, ack_dl_sent_raw_file, ack_dl_rcv_raw_file
            for file in files:
                if file.endswith(".csv"):
                    numbers = file.split("_")[3]
                    pers = file.split("_")[4]
                    if str(ports[0]) in numbers:
                        if pers.split(".")[0] == "client":
                            exp_files[1] = os.path.join(root, file)
                        elif pers.split(".")[0] == "server":
                            exp_files[0] = os.path.join(root, file)
                    if str(ports[1]) in numbers:
                        if pers.split(".")[0] == "client":
                            exp_files[2] = os.path.join(root, file)
                        elif pers.split(".")[0] == "server":
                            exp_files[3] = os.path.join(root, file)
            exp_files_list.append(exp_files)
    return exp_files_list

In [None]:
exp_file_path_raw_dict = {}
for exp in exps:
    for date in dates:
        for device in devices:
            exp_raw_files = find_raw_files(database, date, exp, device)
            if len(exp_raw_files) != 0:
                if exp not in exp_file_path_raw_dict:
                    exp_file_path_raw_dict[exp] = []
                for files in exp_raw_files:
                    print(files)
                    exp_file_path_raw_dict[exp].append(files)

In [None]:
exp_file_path_raw_dict

In [None]:
def GetRawDf(sent_raw_df):
    sent_pkt_raw_df = sent_raw_df[(sent_raw_df['name'] == 'transport:p_sent')]
    sent_ack_raw_df = sent_pkt_raw_df[(sent_pkt_raw_df['name'] == 'transport:p_sent') & (sent_pkt_raw_df['data'].str.contains("'fr_t': 'ack'"))]
    rcv_pkt_raw_df = sent_raw_df[(sent_raw_df['name'] == 'transport:p_rcv')]
    return sent_pkt_raw_df, sent_ack_raw_df, rcv_pkt_raw_df


In [None]:
all_ul_ack_raw_file_len = {}
all_dl_ack_raw_file_len = {}
for exp in exps:
    exp_ul_ack_raw_file_len = []
    exp_dl_ack_raw_file_len = []
    for sent_raw_files in exp_file_path_raw_dict[exp]:
        ul_sent_raw_df = pd.read_csv(sent_raw_files[0], sep=',')
        ul_sent_pkt_raw_df, ul_sent_ack_raw_df, ul_rcv_pkt_raw_df = GetRawDf(ul_sent_raw_df)
        exp_ul_ack_raw_file_len.append([len(ul_sent_ack_raw_df), len(ul_sent_pkt_raw_df), len(ul_rcv_pkt_raw_df), (len(ul_sent_ack_raw_df) / len(ul_rcv_pkt_raw_df)) * 100])
        dl_sent_raw_df = pd.read_csv(sent_raw_files[2], sep=',')
        dl_sent_pkt_raw_df, dl_sent_ack_raw_df, dl_rcv_pkt_raw_df = GetRawDf(dl_sent_raw_df)
        exp_dl_ack_raw_file_len.append([len(dl_sent_ack_raw_df), len(dl_sent_pkt_raw_df), len(dl_rcv_pkt_raw_df), (len(dl_sent_ack_raw_df) / len(dl_rcv_pkt_raw_df)) * 100])
    all_ul_ack_raw_file_len[exp] = exp_ul_ack_raw_file_len
    all_dl_ack_raw_file_len[exp] = exp_dl_ack_raw_file_len

In [None]:
all_ul_ack_raw_file_len
for exp in exps:
    sent_ack_cnt = 0
    sent_ack_per_sec = 0
    sent_ack_interval = 0
    for row in all_ul_ack_raw_file_len[exp][0]:
        sent_ack_cnt += all_ul_ack_raw_file_len[exp][0][0]
    sent_ack_per_sec = sent_ack_cnt / (exp_duration * len(devices))
    sent_ack_interval = 1000 / sent_ack_per_sec
    print("UL:", exp, sent_ack_cnt, f"{sent_ack_interval}ms", sent_ack_per_sec)

In [None]:
all_dl_ack_raw_file_len
for exp in exps:
    sent_ack_cnt = 0
    sent_ack_per_sec = 0
    sent_ack_interval = 0
    for row in all_dl_ack_raw_file_len[exp][0]:
        sent_ack_cnt += all_dl_ack_raw_file_len[exp][0][0]
    sent_ack_per_sec = sent_ack_cnt / (exp_duration * len(devices))
    sent_ack_interval = 1000 / sent_ack_per_sec
    print("DL:", exp, sent_ack_cnt, f"{sent_ack_interval}ms", sent_ack_per_sec)

In [None]:
for exp in exps:
    ul_exp_ack_overhead = [sublist[3] for sublist in all_ul_ack_raw_file_len[exp] if len(sublist) > 2]
    ul_exp_ack_overhead_sum = sum(ul_exp_ack_overhead)
    ul_exp_ack_overhead_cnt = len(all_ul_ack_raw_file_len[exp])
    exp_avg_ul_ack_overhead = ul_exp_ack_overhead_sum / ul_exp_ack_overhead_cnt if ul_exp_ack_overhead_cnt != 0 else 0
    print(exp, "ul", exp_avg_ul_ack_overhead)
    dl_exp_ack_overhead = [sublist[3] for sublist in all_dl_ack_raw_file_len[exp] if len(sublist) > 2]
    dl_exp_ack_overhead_sum = sum(dl_exp_ack_overhead)
    dl_exp_ack_overhead_cnt = len(all_dl_ack_raw_file_len[exp])
    exp_avg_dl_ack_overhead = dl_exp_ack_overhead_sum / dl_exp_ack_overhead_cnt if dl_exp_ack_overhead_cnt != 0 else 0
    print(exp, "dl", exp_avg_dl_ack_overhead)

In [None]:
def plot_avg_ack_overhead(exps, all_ul_ack_raw_file_len, all_dl_ack_raw_file_len):
    ul_averages = []
    dl_averages = []
    
    for exp in exps:
        # Calculate UL average ACK overhead
        ul_exp_ack_overhead = [sublist[3] for sublist in all_ul_ack_raw_file_len[exp] if len(sublist) > 2]
        ul_exp_ack_overhead_sum = sum(ul_exp_ack_overhead)
        ul_exp_ack_overhead_cnt = len(all_ul_ack_raw_file_len[exp])
        exp_avg_ul_ack_overhead = ul_exp_ack_overhead_sum / ul_exp_ack_overhead_cnt if ul_exp_ack_overhead_cnt != 0 else 0
        ul_averages.append(exp_avg_ul_ack_overhead)
        
        # Calculate DL average ACK overhead
        dl_exp_ack_overhead = [sublist[3] for sublist in all_dl_ack_raw_file_len[exp] if len(sublist) > 2]
        dl_exp_ack_overhead_sum = sum(dl_exp_ack_overhead)
        dl_exp_ack_overhead_cnt = len(all_dl_ack_raw_file_len[exp])
        exp_avg_dl_ack_overhead = dl_exp_ack_overhead_sum / dl_exp_ack_overhead_cnt if dl_exp_ack_overhead_cnt != 0 else 0
        dl_averages.append(exp_avg_dl_ack_overhead)
    
    # Plotting
    x = range(len(exps))
    width = 0.35  # width of the bars

    fig, ax = plt.subplots()
    ax.bar(x, ul_averages, width, label='UL Average ACK Overhead')
    ax.bar([i + width for i in x], dl_averages, width, label='DL Average ACK Overhead')

    ax.set_xlabel('Experiments')
    ax.set_ylabel('Average ACK Overhead')
    ax.set_title('Average ACK Overhead for UL and DL')
    ax.set_xticks([i + width/2 for i in x])
    ax.set_xticklabels(exps)
    ax.legend()

    plt.tight_layout()
    plt.show()

In [None]:
plot_avg_ack_overhead(exps, all_ul_ack_raw_file_len, all_dl_ack_raw_file_len)

In [None]:
def getAckInfo(df):
    df = df['data']
    acked_ranges_list = []
    ack_delay_list = []
    ecn_list = []
    # print(acked_ranges_series.iloc[0])
    for i in range(len(df)):
        try: 
            s = df.iloc[i]
            data_dict = json.loads(s.replace("\'", "\""))
            for frame in data_dict['frames']:
                if 'acked_ranges' in frame:
                    for range_entry in frame['acked_ranges']:
                        acked_ranges = range_entry
            for frame in data_dict['frames']:
                if 'ack_delay' in frame:
                    ack_delay = frame['ack_delay']
                    ack_delay_list.append(ack_delay)
                if 'ce' in frame:
                    if frame['ce'] != 0:
                        ecn_list.append(True)
                    else:
                        ecn_list.append(False)
            # Extract 'acked_ranges' from all frames
            acked_ranges = [range_entry for frame in data_dict['frames'] if 'acked_ranges' in frame for range_entry in frame['acked_ranges']]
            acked_ranges_list.append(acked_ranges)
            # TODO: add ack_delay & ecn
            
        except:
            print(s, i)
            break

    acked_ranges_df = pd.DataFrame({"acked_ranges": acked_ranges_list})
    ack_delay_df = pd.DataFrame({"ack_delay": ack_delay_list})
    ecn_df = pd.DataFrame({"ecn": ecn_list})

    ack_info_df = pd.concat([acked_ranges_df, ack_delay_df, ecn_df], axis=1)
    ack_info_df = ack_info_df.reset_index(drop=True)

    return ack_info_df

In [None]:
def plotAckedRanges(df):
    plt.figure(figsize=(12, 6))
    # Iterate over rows in the DataFrame
    for index, row in df.iterrows():
        ranges = row['acked_ranges']

        # If ranges is a list (and not empty), plot vertical lines
        if isinstance(ranges, list):
            for value in ranges:
                if len(value) < 2:
                    plt.plot([index, index], [value[0], value[0]], color='b', linestyle='-', alpha=0.7)
                else:
                    plt.plot([index, index], [value[0], value[1]], color='b', linestyle='-', alpha=0.7)

    # Labels and title
    plt.xlabel('Row Index')
    plt.ylabel('Acked Ranges')
    plt.title('Vertical Lines Representing Acked Ranges')
    
    plt.grid(True)
    plt.show()


Check ACK packet freq.

In [None]:
# mean_ack2_dl_delay = ack2_dl_sent_df['ack_delay'].mean()
# mean_ack2_ul_delay = ack2_ul_sent_df['ack_delay'].mean()
# print(mean_ack2_dl_delay)
# ack2_dl_sent_df.iloc[100000:100010]['acked_ranges']
# print(mean_ack2_ul_delay)
# ack2_ul_sent_df.iloc[100000:100010]['acked_ranges']

In [None]:
# mean_ack10_dl_delay = ack10_dl_sent_df['ack_delay'].mean()
# mean_ack10_ul_delay = ack10_ul_sent_df['ack_delay'].mean()
# print(mean_ack10_dl_delay)
# ack10_dl_sent_df.iloc[100000:100010]['acked_ranges']
# print(mean_ack10_ul_delay)
# ack10_ul_sent_df.iloc[100000:100010]['acked_ranges']

In [None]:
# mean_ack15_dl_delay = ack15_dl_sent_df['ack_delay'].mean()
# mean_ack15_ul_delay = ack15_ul_sent_df['ack_delay'].mean()
# print(mean_ack15_dl_delay)
# print(ack15_dl_sent_df.iloc[100000:100010]['acked_ranges'])
# print(mean_ack15_ul_delay)
# print(ack15_ul_sent_df.iloc[100000:100010]['acked_ranges'])

Goodput ratio

In [None]:
def find_ul_sent_file(database, date, exp, device):
    ul_files = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'data')
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if file.startswith("ul_processed_sent"):
                    ul_files.append(os.path.join(root, file))
                    break  # Exit the inner loop once the port is found
    return ul_files

def find_dl_sent_file(database, date, exp, device):
    dl_files = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'data')
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if "dl_processed_sent" in file:
                    dl_files.append(os.path.join(root, file))
                    break  # Exit the inner loop once the port is found
    return dl_files

In [None]:
def calculate_goodput(sent_df):
    df_copy = sent_df.copy()
    df_copy.set_index(['packet_number', 'offset'], inplace=True)

    # Group by 'offset' and count occurrences
    offset_counts = df_copy.groupby(level='offset').size()

    # Identify repeated and not repeated offsets
    repeated_offsets = offset_counts[offset_counts > 1].index
    not_repeated_offsets = offset_counts[offset_counts == 1].index

    goodput = len(not_repeated_offsets)*100 / len(df_copy)
    return goodput

In [None]:
def plot_avg_goodput(avg_ul_goodput, avg_dl_goodput):
    # Extract experiment names and average goodput values
    exp_names_ul = list(avg_ul_goodput.keys())
    avg_ul_values = list(avg_ul_goodput.values())

    exp_names_dl = list(avg_dl_goodput.keys())
    avg_dl_values = list(avg_dl_goodput.values())

    # Plotting
    plt.figure(figsize=(10, 5))

    # Plot Uplink Goodput
    plt.plot(exp_names_ul, avg_ul_values, marker='o', label='Uplink Goodput')

    # Plot Downlink Goodput
    plt.plot(exp_names_dl, avg_dl_values, marker='o', label='Downlink Goodput')

    # Add labels and title
    plt.xlabel('Experiment')
    plt.ylabel('Goodput (%)')
    plt.title('Average Uplink and Downlink Goodput')
    
    # Add legend
    plt.legend()

    # Show plot
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [None]:
all_ul_goodput = {}
all_dl_goodput = {}
# Iterate over dates, exps, and devices
for exp in exps:
    exp_ul_goodput = []
    exp_dl_goodput = []
    for date in dates:
        for device in devices:
            ul_sent_files = find_ul_sent_file(database, date, exp, device)
            dl_sent_files = find_dl_sent_file(database, date, exp, device)
            for ul_sent_file in ul_sent_files:
                ul_sent_df = pd.read_csv(ul_sent_file, sep='@')
                exp_ul_goodput.append(calculate_goodput(ul_sent_df))
            for dl_sent_file in dl_sent_files:
                dl_sent_df = pd.read_csv(dl_sent_file, sep='@')
                exp_dl_goodput.append(calculate_goodput(dl_sent_df))

    all_ul_goodput[exp] = exp_ul_goodput
    all_dl_goodput[exp] = exp_dl_goodput

avg_ul_goodput = {}
avg_dl_goodput = {}
# Calculate average uplink and downlink goodput for each experiment
for exp, ul_goodput_list in all_ul_goodput.items():
    avg_ul_goodput[exp] = sum(ul_goodput_list) / len(ul_goodput_list)
for exp, dl_goodput_list in all_dl_goodput.items():
    avg_dl_goodput[exp] = sum(dl_goodput_list) / len(dl_goodput_list)

print("Average Uplink Goodput:")
print(avg_ul_goodput)
print("\nAverage Downlink Goodput:")
print(avg_dl_goodput)


In [None]:
plot_avg_goodput(avg_ul_goodput, avg_dl_goodput)

Throughput

In [None]:
def find_stats_files(database, date, exp, device):
    ul_files = []
    dl_files = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'statistics')
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if file.startswith("ul_statistics"):
                    ul_files.append(os.path.join(root, file))
                if file.startswith("dl_statistics"):
                    dl_files.append(os.path.join(root, file))
    return ul_files, dl_files

def calculate_avg_stats(df):
    avg_total_packets = int(df['total_packets'].mean())
    avg_data_packets  = int(df['total_data_packets'].mean())
    avg_original_pkl  = int(df['original_pkl'].mean())
    avg_reordering_threshold = int(df['reordering_threshold'].mean())
    avg_time_threshold = int(df['time_threshold'].mean())
    avg_real_pkl = int(df['reordering_threshold'].mean()) + int(df['time_threshold'].mean())
    
    return {"total_packets": avg_total_packets,
            "total_data_packets": avg_data_packets,
            "original_pkl": avg_original_pkl,
            "reordering_threshold": avg_reordering_threshold,
            "time_threshold": avg_time_threshold,
            "real_pkl": avg_real_pkl,
            "exec_reordering": int(df['exec_reordering'].mean()),
            "exec_time": int(df['exec_time'].mean()),
            "exec_lat": int(df['exec_reordering'].mean()) + int(df['exec_time'].mean()),
            "reordering_pkl_rate(%)": 0 if avg_real_pkl == 0 else avg_reordering_threshold*100 / avg_real_pkl,
            "time_pkl_rate(%)": 0 if avg_real_pkl == 0 else avg_time_threshold*100 / avg_real_pkl,
            "real_pkl_rate(%)": 0 if avg_original_pkl == 0 else avg_real_pkl*100 / avg_original_pkl,
            "original_packet_loss_rate(%)": avg_original_pkl*100 / avg_total_packets,
            "adjusted_packet_loss_rate(%)": avg_real_pkl*100 / avg_total_packets
            }

In [None]:
all_avg_ul_stats = {}
all_avg_dl_stats = {}
# Iterate over dates, exps, and devices
for exp in exps:
    exp_ul_stats_files = []
    exp_dl_stats_files = []
    for date in dates:
        for device in devices:
            ul_stats_files, dl_stats_files = find_stats_files(database, date, exp, device)
            exp_ul_stats_files.extend(ul_stats_files)
            exp_dl_stats_files.extend(dl_stats_files)
    stats = []
    # Iterate over each file path
    for file_path in exp_ul_stats_files:
        # Read CSV file into a DataFrame and append it to the list
        df = pd.read_csv(file_path, encoding="utf-8")
        stats.append(df)
    # Concatenate all DataFrames into a single DataFrame
    exp_ul_stats = pd.concat(stats, ignore_index=True)

    stats = []
    for file_path in exp_dl_stats_files:
        df = pd.read_csv(file_path, encoding="utf-8")
        stats.append(df)
    exp_dl_stats = pd.concat(stats, ignore_index=True)

    all_avg_ul_stats[exp] = calculate_avg_stats(exp_ul_stats)
    all_avg_dl_stats[exp] = calculate_avg_stats(exp_dl_stats)


In [None]:
all_avg_ul_stats

In [None]:
all_avg_dl_stats

In [None]:
# Calculations
for key, value in all_avg_ul_stats.items():
    total_data_packets = value['total_data_packets']
    original_packet_loss_rate = value['original_packet_loss_rate(%)']
    # Calculate total throughput
    total_throughput = (total_data_packets * data_len) / time_data
    total_goodput = (1 - original_packet_loss_rate) * total_throughput
    # Add results to dictionary
    all_avg_ul_stats[key]['total_throughput'] = total_throughput
    all_avg_ul_stats[key]['total_goodput'] = total_goodput
for key, value in all_avg_dl_stats.items():
    total_data_packets = value['total_data_packets']
    original_packet_loss_rate = value['original_packet_loss_rate(%)']
    # Calculate total throughput
    total_throughput = (total_data_packets * data_len) / time_data
    total_goodput = (1 - original_packet_loss_rate) * total_throughput
    # Add results to dictionary
    all_avg_dl_stats[key]['total_throughput'] = total_throughput
    all_avg_dl_stats[key]['total_goodput'] = total_goodput

# Print results
for key, value in all_avg_ul_stats.items():
    print(f"{key}: UL Total Throughput = {value['total_throughput']}, UL Total Goodput = {value['total_goodput']}")
for key, value in all_avg_dl_stats.items():
    print(f"{key}: DL Total Throughput = {value['total_throughput']}, DL Total Goodput = {value['total_goodput']}")

In [None]:
# Assume all_avg_ul_stats and all_avg_dl_stats are already defined

# Define the baseline key
baseline_key = 'QUIC-ack2'

# Extract baseline values for UL
baseline_ul_throughput = all_avg_ul_stats[baseline_key]['total_throughput']
baseline_ul_goodput = all_avg_ul_stats[baseline_key]['total_goodput']

# Extract baseline values for DL
baseline_dl_throughput = all_avg_dl_stats[baseline_key]['total_throughput']
baseline_dl_goodput = all_avg_dl_stats[baseline_key]['total_goodput']

# Calculate and print increases for UL
print("UL Increases:")
for key, value in all_avg_ul_stats.items():
    if key != baseline_key:
        ul_throughput_increase = (value['total_throughput'] - baseline_ul_throughput) / baseline_ul_throughput * 100
        ul_goodput_increase = (value['total_goodput'] - baseline_ul_goodput) / baseline_ul_goodput * 100
        print(f"{key}: UL Throughput Increase = {ul_throughput_increase:.2f}%, UL Goodput Increase = {ul_goodput_increase:.2f}%")

# Calculate and print increases for DL
print("\nDL Increases:")
for key, value in all_avg_dl_stats.items():
    if key != baseline_key:
        dl_throughput_increase = (value['total_throughput'] - baseline_dl_throughput) / baseline_dl_throughput * 100
        dl_goodput_increase = (value['total_goodput'] - baseline_dl_goodput) / baseline_dl_goodput * 100
        print(f"{key}: DL Throughput Increase = {dl_throughput_increase:.2f}%, DL Goodput Increase = {dl_goodput_increase:.2f}%")

In [None]:
def plotThroughputGoodput(stats, ul_dl):
    # Extract keys, throughput, and goodput
    keys = list(stats.keys())
    throughput = [stats[key]['total_throughput'] for key in keys]
    goodput = [stats[key]['total_goodput'] for key in keys]

    # Plotting
    plt.figure(figsize=(14, 6))
    bar_width = 0.35
    index = range(len(keys))
    
    # Throughput and Goodput bars
    plt.bar(index, throughput, bar_width, label=f'{ul_dl} Throughput', alpha=0.7, color='b')
    plt.bar([i + bar_width for i in index], goodput, bar_width, label=f'{ul_dl} Goodput', alpha=0.7, color='g')
    
    # Labels and Title
    plt.xlabel('Experiments')
    plt.ylabel('Throughput (Mbps)')
    plt.title(f'Throughput and Goodput for {ul_dl}')
    plt.xticks([i + bar_width / 2 for i in index], keys)
    plt.legend()

    plt.tight_layout()
    plt.show()


In [None]:
plotThroughputGoodput(all_avg_ul_stats, "UL")  # For UL stats
plotThroughputGoodput(all_avg_dl_stats, "DL")  # For DL stats