In [578]:
import csv
import ast
import json
import statistics

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime, timedelta

In [579]:
date = "20240126"
date_with_dash = "2024-01-26"
port = "4201"
phone_time = "0755"
time = "1555"
sent_file_name = f"log_{date}_{time}_{port}_server"
received_file_name = f"log_{date}_{phone_time}_{port}_client"
path = "/Users/molly/Desktop/2024-01-26/QUIC-450sec/sm00/#01/"

In [580]:
sync_file_name = f"/Users/molly/Desktop/{date_with_dash}/time_sync_sm00.json"
sync_file = path + "raw/" + sync_file_name
with open(sync_file, 'r') as file:
    data = json.load(file)

# Extract values from the dictionary
values = list(data.values())
mean_diff = values[0] * 1000

### Transform to JSON & CSV file
Process the qlog file to json file & csv file.

In [581]:
def QlogToJsonEntry(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Add commas between lines
    json_str = ",".join(lines)
    # Surround the entire string with square brackets to create a JSON array
    json_str = "[" + json_str + "]"
    # Load the JSON array
    json_entry = json.loads(json_str)
    
    return json_entry

def QlogToJson(json_entry, json_file_path):
    with open(json_file_path, 'w') as json_file:
        json.dump(json_entry, json_file, indent=2)

def JsonToCsv(json_entry, csv_file_path):
     # Open CSV file for writing
    with open(csv_file_path, 'w', newline='') as csv_file:
        # Create a CSV writer
        csv_writer = csv.writer(csv_file)

        # Write header row based on the keys of the second JSON object (assuming at least two objects are present)
        if len(json_entry) >= 2:
            header = list(json_entry[1].keys())
            csv_writer.writerow(header)

            # Write data rows starting from the second object
            for entry in json_entry[1:]:
                csv_writer.writerow(entry.values())

In [582]:
# sender_side_file
sent_raw_path = path + "raw/" + sent_file_name
sent_qlog_file_path = sent_raw_path + ".qlog"
sent_json_file_path = sent_raw_path + ".json"
sent_csv_file_path = sent_raw_path + ".csv"
sent_json_entry = QlogToJsonEntry(sent_qlog_file_path)
QlogToJson(sent_json_entry, sent_json_file_path)
JsonToCsv(sent_json_entry, sent_csv_file_path)

In [583]:
received_raw_path = path + "raw/" + received_file_name
received_qlog_file_path = received_raw_path + ".qlog"
received_json_file_path = received_raw_path + ".json"
received_csv_file_path = received_raw_path + ".csv"
received_json_entry = QlogToJsonEntry(received_qlog_file_path)
QlogToJson(received_json_entry, received_json_file_path)
JsonToCsv(received_json_entry, received_csv_file_path)

In [584]:
sent_df = pd.read_csv(sent_csv_file_path)
received_df = pd.read_csv(received_csv_file_path)

Set time to UMT+8.

In [585]:
def GetStartTime(json_data):
    # unit: ms
    refTime = json_data[0]["trace"]["common_fields"]["reference_time"]
    return refTime

def ProcessTime(df, reference_time):
    # Extract the "time" values from the DataFrame
    original_times = (df['time'].astype(float))

    # Calculate "epoch_time" and convert to timestamps
    epoch_times = (reference_time + original_times)
    timestamps = pd.to_datetime(epoch_times, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')

    df['epoch_time'] = epoch_times
    df['timestamp'] = timestamps

    return df

In [586]:
# No matter downlink or uplink, the file time that need to change is client side.
if int(port)%2 == 0: # UL
    clientStartTime = GetStartTime(sent_json_entry)
    print(clientStartTime)
    serverStartTime = GetStartTime(received_json_entry)
    print(serverStartTime)

    senderRefTime = clientStartTime + mean_diff
    rcverRefTime = serverStartTime

else:   # DL
    clientStartTime = GetStartTime(received_json_entry)
    print(clientStartTime)
    serverStartTime = GetStartTime(sent_json_entry)
    print(serverStartTime)
    startTimeDiff = (clientStartTime - serverStartTime) + mean_diff

    senderRefTime = serverStartTime
    rcverRefTime = clientStartTime + mean_diff


1706255738207.4697
1706255728602.5151


In [587]:
sent_df = ProcessTime(sent_df, senderRefTime)
# Add 8 hours to both epoch times and timestamps to match UMT+8
# Also sync time with server
epoch_times_gmt8 = sent_df["epoch_time"] + 8 * 3600 * 1000
sent_df["epoch_time"] = epoch_times_gmt8
timestamps_gmt8 = pd.to_datetime(epoch_times_gmt8, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')
sent_df["timestamp"] = timestamps_gmt8

sent_df[-5:]

Unnamed: 0,time,name,data,epoch_time,timestamp
838696,450082.697479,recovery:metrics_updated,"{'smoothed_rtt': 29.908, 'latest_rtt': 32.7018...",1706285000000.0,2024-01-26 16:02:58.685212
838697,450082.700486,recovery:loss_timer_updated,{'event_type': 'cancelled'},1706285000000.0,2024-01-26 16:02:58.685215
838698,450082.701927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.685217
838699,450083.006244,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.685521
838700,450083.026519,transport:connection_closed,"{'owner': 'remote', 'application_code': 0, 're...",1706285000000.0,2024-01-26 16:02:58.685541


In [588]:
received_df = ProcessTime(received_df, rcverRefTime)
# if the sender is server, then it is no need to calculate time difference
epoch_times_gmt8 = received_df["epoch_time"] + 8 * 3600 * 1000
received_df["epoch_time"] = epoch_times_gmt8
timestamps_gmt8 = pd.to_datetime(epoch_times_gmt8, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')
received_df["timestamp"] = timestamps_gmt8

received_df[-5:]

Unnamed: 0,time,name,data,epoch_time,timestamp
293158,450142.295297,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.666648
293159,450142.327692,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.666680
293160,450142.393942,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'df...",1706285000000.0,2024-01-26 16:02:58.666747
293161,450143.674828,transport:connection_closed,"{'owner': 'local', 'application_code': 0, 'rea...",1706285000000.0,2024-01-26 16:02:58.668028
293162,450143.863109,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'df...",1706285000000.0,2024-01-26 16:02:58.668216


Parse the data.

In [589]:
# sender side data
metrics_all_rows = sent_df[(sent_df['name'] == 'recovery:metrics_updated') & (sent_df['data'].str.contains("'bytes_in_flight':"))]
metrics_sent_rows = sent_df[(sent_df['name'] == 'recovery:metrics_updated') & (sent_df['data'].str.contains("{'bytes_in_flight':"))]
metrics_ack_rows = sent_df[(sent_df['name'] == 'recovery:metrics_updated') & (sent_df['data'].str.contains("'latest_rtt':"))]
total_sent_rows = sent_df[(sent_df['name'] == 'transport:packet_sent')]
pk_sent_rows = sent_df[(sent_df['name'] == 'transport:packet_sent') & (sent_df['data'].str.contains("'frame_type': 'stream'"))]
rcv_ack_rows = sent_df[(sent_df['name'] == 'transport:packet_received') & (sent_df['data'].str.contains("'frame_type': 'ack'")) & (sent_df['data'].str.contains("'packet_type': '1RTT'"))]
lost_rows = sent_df[sent_df['name'] == 'recovery:packet_lost']

# Get the count of rows
metrics_all_cnt = len(metrics_all_rows)
metrics_c_cnt = len(metrics_sent_rows)
metrics_ack_cnt = len(metrics_ack_rows)
total_sent_cnt = len(total_sent_rows)
pk_sent_cnt = len(pk_sent_rows)
rcv_ack_cnt = len(rcv_ack_rows)
lost_cnt = len(lost_rows)

print("packet_sent: ", pk_sent_cnt, metrics_c_cnt)
print("ack: ", rcv_ack_cnt, metrics_ack_cnt)
print(metrics_all_cnt, metrics_c_cnt, metrics_ack_cnt, pk_sent_cnt, rcv_ack_cnt, lost_cnt)

packet_sent:  221410 221401
ack:  58032 57942
279343 221401 57942 221410 58032 563


In [590]:
pk_rcv_rows = received_df[(received_df['name'] == "transport:packet_received") & (received_df['data'].str.contains("'frame_type': 'stream'"))]
pk_rcv_rows = pk_rcv_rows.reset_index(drop=True)
print(len(pk_rcv_rows))
pk_rcv_rows[:5]

220832


Unnamed: 0,time,name,data,epoch_time,timestamp
0,138.097239,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662450
1,138.115729,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662469
2,138.126145,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662479
3,143.889895,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.668243
4,143.914739,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.668268


## Deal with sender side data
Concat `transport:packet_sent` & `recovery:metrics_updated`.

In [591]:
metrics_sent_csv_file_path = path + "middle/" + f"sent_metrics_{time}_{port}.csv"
metrics_sent_rows.to_csv(metrics_sent_csv_file_path, index=False)
pk_sent_csv_file_path = path + "middle/" + f"pk_sent_{time}_{port}.csv"
pk_sent_rows.to_csv(pk_sent_csv_file_path, index=False)

In [592]:
def insert(df, idx, new_row):
    df1 = df.iloc[:idx, :]
    df2 = df.iloc[idx:, :]
    df_new = pd.concat([df1, new_row, df2], ignore_index=True)
    return df_new

In [593]:
metrics_sent_rows = metrics_sent_rows.reset_index(drop=True)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)
print(metrics_sent_rows[:5])
print(pk_sent_rows[:5])


        time                      name  \
0   3.621524  recovery:metrics_updated   
1   3.624216  recovery:metrics_updated   
2  48.168758  recovery:metrics_updated   
3  48.590994  recovery:metrics_updated   
4  49.628376  recovery:metrics_updated   

                                                data    epoch_time  \
0  {'bytes_in_flight': 1161, 'packets_in_flight': 2}  1.706285e+12   
1  {'bytes_in_flight': 1252, 'packets_in_flight': 3}  1.706285e+12   
2   {'bytes_in_flight': 307, 'packets_in_flight': 1}  1.706285e+12   
3   {'bytes_in_flight': 578, 'packets_in_flight': 2}  1.706285e+12   
4   {'bytes_in_flight': 851, 'packets_in_flight': 3}  1.706285e+12   

                    timestamp  
0  2024-01-26 15:55:28.606136  
1  2024-01-26 15:55:28.606139  
2  2024-01-26 15:55:28.650683  
3  2024-01-26 15:55:28.651106  
4  2024-01-26 15:55:28.652143  
        time                   name  \
0  48.581362  transport:packet_sent   
1  49.613347  transport:packet_sent   
2  51.595266  tra

In [594]:
ori_recover_c_len = len(metrics_sent_rows)
for i in range(pk_sent_cnt):
    time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']
    # print(i, time_diff_list)
    # time_diff >= 1: not the matching metrics_update
    while time_diff >= 1:
        data = metrics_sent_rows.iloc[i-1]['data']
        new_row_data = {'time': [pk_sent_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        # print(new_row)
        metrics_sent_rows = insert(metrics_sent_rows, i, new_row)
        time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']
    # time_diff < 0: missing metrics_update
    while time_diff < 0:
        # print(i, time_diff_list)
        metrics_sent_rows.drop(index=metrics_sent_rows.index[i], inplace=True)
        time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']

print(ori_recover_c_len, len(metrics_sent_rows))


221401 221410


In [595]:
metrics_sent_rows = metrics_sent_rows.reset_index(drop=True)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)
print(len(metrics_sent_rows), len(pk_sent_rows))

# check whether there's still mismatch exist.
time_diff_list = metrics_sent_rows['time'] - pk_sent_rows['time']
mismatch_indices = time_diff_list[(time_diff_list >= 1) | (time_diff_list < 0)].index
if len(mismatch_indices) == 0:
    print("All Matched!")
else:
    print(mismatch_indices)


221410 221410
All Matched!


In [596]:
# extract bytes_in_flight & packets_in_flight
metrics_sent_rows['bytes_in_flight'] = None
metrics_sent_rows['packets_in_flight'] = None

# Use ast.literal_eval to safely evaluate the string and extract 'bytes_in_flight' and 'packets_in_flight'
metrics_sent_rows[['bytes_in_flight', 'packets_in_flight']] = metrics_sent_rows['data'].apply(
    lambda x: pd.Series(ast.literal_eval(x)) if isinstance(x, str) else pd.Series([None, None]))

metrics_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight
0,48.590994,recovery:metrics_updated,"{'bytes_in_flight': 578, 'packets_in_flight': 2}",1706285000000.0,2024-01-26 15:55:28.651106,578,2
1,49.628376,recovery:metrics_updated,"{'bytes_in_flight': 851, 'packets_in_flight': 3}",1706285000000.0,2024-01-26 15:55:28.652143,851,3
2,51.6013,recovery:metrics_updated,"{'bytes_in_flight': 1124, 'packets_in_flight': 4}",1706285000000.0,2024-01-26 15:55:28.654116,1124,4
3,53.57987,recovery:metrics_updated,"{'bytes_in_flight': 1397, 'packets_in_flight': 5}",1706285000000.0,2024-01-26 15:55:28.656094,1397,5
4,55.559284,recovery:metrics_updated,"{'bytes_in_flight': 1670, 'packets_in_flight': 6}",1706285000000.0,2024-01-26 15:55:28.658074,1670,6


In [597]:
# Add bytes_in_flight & packets_in_flight to pk_sent_rows
pk_sent_rows['bytes_in_flight'] = metrics_sent_rows['bytes_in_flight']
pk_sent_rows['packets_in_flight'] = metrics_sent_rows['packets_in_flight']

pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight
0,48.581362,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.651096,578,2
1,49.613347,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.652128,851,3
2,51.595266,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.654110,1124,4
3,53.574539,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.656089,1397,5
4,55.558341,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.658073,1670,6


Concat `transport:packet_received` & `recovery:metrics_updated`.

In [598]:
metrics_ack_csv_file_path = path + "middle/" + f"ack_metrics_{time}_{port}.csv" 
metrics_ack_rows.to_csv(metrics_ack_csv_file_path, index=False)
rcv_ack_csv_file_path = path + "middle/" + f"rcv_ack_{time}_{port}.csv"
rcv_ack_rows.to_csv(rcv_ack_csv_file_path, index=False)

In [599]:
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)
initial_ack_metrics = metrics_ack_rows.iloc[[0]]
metrics_ack_rows.drop(index=metrics_ack_rows.index[0], inplace=True)
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)

In [600]:
metrics_ack_rows[:3]

Unnamed: 0,time,name,data,epoch_time,timestamp
0,47.729589,recovery:metrics_updated,"{'min_rtt': 44.05906, 'smoothed_rtt': 44.05906...",1706285000000.0,2024-01-26 15:55:28.650244
1,48.045894,recovery:metrics_updated,"{'smoothed_rtt': 44.06, 'latest_rtt': 44.07151...",1706285000000.0,2024-01-26 15:55:28.650561
2,72.915097,recovery:metrics_updated,"{'min_rtt': 21.323575, 'smoothed_rtt': 41.217,...",1706285000000.0,2024-01-26 15:55:28.675430


In [601]:
rcv_ack_rows[:3]

Unnamed: 0,time,name,data,epoch_time,timestamp
0,48.04849,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.650563
1,72.915982,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.675431
2,77.753696,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.680268


In [602]:
print(len(metrics_ack_rows), len(rcv_ack_rows))
for i in range(rcv_ack_cnt):
    time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
    # time_diff >= 1: not the matching metrics_update
    while time_diff > 0:
        # print("> 0:", i, time_diff)
        if i == 0:
            data = initial_ack_metrics.iloc[0]['data']
        else:
            data = metrics_ack_rows.iloc[i-1]['data']
        new_row_data = {'time': [rcv_ack_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        metrics_ack_rows = insert(metrics_ack_rows, i, new_row)
        time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
    # time_diff < 0: missing metrics_update
    while time_diff <= -1:
        # print("<= -1:", i, time_diff)
        metrics_ack_rows.drop(index=metrics_ack_rows.index[i], inplace=True)
        time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
print(len(metrics_ack_rows), len(rcv_ack_rows))

57941 58032
58032 58032


In [603]:
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

# check whether there's still mismatch exist.
time_diff_list = metrics_ack_rows['time'] - rcv_ack_rows['time']
mismatch_indices = time_diff_list[(time_diff_list <= -1) | (time_diff_list > 0)].index
if len(mismatch_indices) == 0:
    print("All Matched!")
else:
    print(mismatch_indices)

Index([588, 6974, 10978, 36046, 40414, 46773, 52981], dtype='int64')


In [604]:
ack_json_list = []
## Add the initial_ack_metrics for temporary
print(initial_ack_metrics)
metrics_ack_rows = pd.concat([initial_ack_metrics, metrics_ack_rows], axis=0).reset_index(drop=True)
for i in range(len(metrics_ack_rows)):
    s = metrics_ack_rows.iloc[i]['data'].replace("\'", "\"")
    json_object = json.loads(s)
    ack_json_list.append(json_object)

metrics_ack_df = pd.DataFrame(ack_json_list)
# Fill missing values in each row with the previous row's values
metrics_ack_df = metrics_ack_df.ffill(axis=0)

## drop initial_ack_metrics
metrics_ack_rows.drop(index=metrics_ack_rows.index[0], inplace=True)
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
metrics_ack_df.drop(index=metrics_ack_df.index[0], inplace=True)
metrics_ack_df = metrics_ack_df.reset_index(drop=True)
metrics_ack_df[:5]

       time                      name  \
0  3.617613  recovery:metrics_updated   

                                                data    epoch_time  \
0  {'min_rtt': 0, 'smoothed_rtt': 0, 'latest_rtt'...  1.706285e+12   

                    timestamp  
0  2024-01-26 15:55:28.606132  


Unnamed: 0,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
0,44.05906,44.05906,44.05906,22.02953,40064.0,803,2.0
1,21.323575,41.217,21.323575,18.077,40064.0,2730,10.0
2,16.165704,38.085,16.165704,19.82,40064.0,2184,8.0
3,16.165704,36.095,22.170922,18.843,40064.0,3003,11.0
4,16.165704,33.908,18.603118,18.505,40064.0,2457,9.0


In [605]:
metrics_ack_rows = pd.concat([metrics_ack_rows, metrics_ack_df], axis=1).reset_index(drop=True)
# since we have parse out all the information in data, we can drop the data cl=olumn
metrics_ack_rows = metrics_ack_rows.drop(columns=['data'])
metrics_ack_rows[:5]

Unnamed: 0,time,name,epoch_time,timestamp,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
0,47.729589,recovery:metrics_updated,1706285000000.0,2024-01-26 15:55:28.650244,44.05906,44.05906,44.05906,22.02953,40064.0,803,2.0
1,72.915097,recovery:metrics_updated,1706285000000.0,2024-01-26 15:55:28.675430,21.323575,41.217,21.323575,18.077,40064.0,2730,10.0
2,77.751403,recovery:metrics_updated,1706285000000.0,2024-01-26 15:55:28.680266,16.165704,38.085,16.165704,19.82,40064.0,2184,8.0
3,87.853245,recovery:metrics_updated,1706285000000.0,2024-01-26 15:55:28.690368,16.165704,36.095,22.170922,18.843,40064.0,3003,11.0
4,92.447888,recovery:metrics_updated,1706285000000.0,2024-01-26 15:55:28.694963,16.165704,33.908,18.603118,18.505,40064.0,2457,9.0


In [606]:
# Check whehter the length is equal before concating metrics into rcv_ack_rows
print(len(rcv_ack_rows), len(metrics_ack_df))

58032 58032


In [607]:
rcv_ack_rows = pd.concat([rcv_ack_rows, metrics_ack_df], axis=1)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

print(len(rcv_ack_rows), len(metrics_ack_df))
rcv_ack_rows[-5:]

58032 58032


Unnamed: 0,time,name,data,epoch_time,timestamp,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
58027,450040.496991,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.643012,14.988589,29.995,32.66761,3.658,31566.0,4400,16.0
58028,450051.062386,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.653577,14.988589,29.91,29.320823,2.912,31566.0,4100,14.0
58029,450051.065377,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.653580,14.988589,28.804,21.06582,4.395,31566.0,3000,10.0
58030,450072.268961,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.674784,14.988589,29.51,34.455816,4.709,31566.0,1900,6.0
58031,450082.701927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 16:02:58.685217,14.988589,29.908,32.701801,4.329,31566.0,0,6.0


Mapping the ACK ranges

In [608]:
acked_ranges_series = rcv_ack_rows['data']
acked_ranges_list = []
for i in range(len(acked_ranges_series)):
    s = acked_ranges_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    # Extract 'acked_ranges' from all frames
    acked_ranges = [range_entry for frame in data_dict['frames'] if 'acked_ranges' in frame for range_entry in frame['acked_ranges']]
    acked_ranges_list.append(acked_ranges)

acked_ranges_df = pd.DataFrame({"acked_ranges": acked_ranges_list})
acked_ranges_df[:5]

Unnamed: 0,acked_ranges
0,[[0]]
1,"[[0, 4]]"
2,"[[0, 9]]"
3,"[[0, 11]]"
4,"[[0, 15]]"


In [609]:
rcv_ack_rows = pd.concat([rcv_ack_rows, acked_ranges_df], axis=1)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

rcv_ack_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight,acked_ranges
0,48.04849,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.650563,44.05906,44.05906,44.05906,22.02953,40064.0,803,2.0,[[0]]
1,72.915982,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.675431,21.323575,41.217,21.323575,18.077,40064.0,2730,10.0,"[[0, 4]]"
2,77.753696,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.680268,16.165704,38.085,16.165704,19.82,40064.0,2184,8.0,"[[0, 9]]"
3,87.855267,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.690370,16.165704,36.095,22.170922,18.843,40064.0,3003,11.0,"[[0, 11]]"
4,92.44872,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.694963,16.165704,33.908,18.603118,18.505,40064.0,2457,9.0,"[[0, 15]]"


In [610]:
# parse out the packet_number & offset & length
pk_sent_series =  pk_sent_rows['data']
pk_num_list = []
offset_list = []
length_list = []
for i in range(len(pk_sent_series)):
    s = pk_sent_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    packet_number = data_dict['header']['packet_number']
    # Initialize offset to None in case 'frame_type': 'stream' is not found
    offset = None
    # Iterate through frames to find 'offset' for 'frame_type': 'stream'
    for frame in data_dict.get('frames', []):
        if frame.get('frame_type') == 'stream':
            offset = frame.get('offset')
            length = frame.get('length')
            break  # Stop iterating once 'offset' is found
    
    pk_num_list.append(packet_number)
    offset_list.append(offset)
    length_list.append(length)

pk_num_df = pd.DataFrame({"packet_number": pk_num_list, "offset": offset_list, "length": length_list})
pk_num_df[:5]

Unnamed: 0,packet_number,offset,length
0,2,0,250
1,3,250,250
2,4,500,250
3,5,750,250
4,6,1000,250


In [611]:
pk_sent_rows = pd.concat([pk_sent_rows, pk_num_df], axis=1)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)

pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length
0,48.581362,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.651096,578,2,2,0,250
1,49.613347,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.652128,851,3,3,250,250
2,51.595266,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.654110,1124,4,4,500,250
3,53.574539,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.656089,1397,5,5,750,250
4,55.558341,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.658073,1670,6,6,1000,250


In [612]:
pk_sent_rows['smoothed_rtt'] = np.nan
pk_sent_rows['latest_rtt'] = np.nan
pk_sent_rows['rtt_variance'] = np.nan
pk_sent_rows['congestion_window'] = np.nan

pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window
0,48.581362,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.651096,578,2,2,0,250,,,,
1,49.613347,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.652128,851,3,3,250,250,,,,
2,51.595266,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.654110,1124,4,4,500,250,,,,
3,53.574539,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.656089,1397,5,5,750,250,,,,
4,55.558341,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.658073,1670,6,6,1000,250,,,,


In [613]:
def update_pk_sent_rows(row):
    acked_ranges = row['acked_ranges']
    smoothed_rtt = row['smoothed_rtt']
    latest_rtt = row['latest_rtt']
    rtt_variance = row['rtt_variance']
    congestion_window = row['congestion_window']

    for ack_range in acked_ranges:
        start_packet, end_packet = ack_range[0], ack_range[-1]
        existing_packets = set(pk_sent_rows['packet_number'])
        packet_numbers_to_update = set(range(start_packet, end_packet + 1)).intersection(existing_packets)

        mask = pk_sent_rows['packet_number'].isin(packet_numbers_to_update)
        pk_sent_rows.loc[mask, 'smoothed_rtt'] = pk_sent_rows.loc[mask, 'smoothed_rtt'].fillna(smoothed_rtt)
        pk_sent_rows.loc[mask, 'latest_rtt'] = pk_sent_rows.loc[mask, 'latest_rtt'].fillna(latest_rtt)
        pk_sent_rows.loc[mask, 'congestion_window'] = pk_sent_rows.loc[mask, 'congestion_window'].fillna(congestion_window)
        pk_sent_rows.loc[mask, 'rtt_variance'] = pk_sent_rows.loc[mask, 'rtt_variance'].fillna(rtt_variance)

# Apply the custom update function to each row in rcv_ack_rows
rcv_ack_rows.apply(update_pk_sent_rows, axis=1)

# Display the updated pk_sent_rows
pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window
0,48.581362,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.651096,578,2,2,0,250,41.217,21.323575,18.077,40064.0
1,49.613347,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.652128,851,3,3,250,250,41.217,21.323575,18.077,40064.0
2,51.595266,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.654110,1124,4,4,500,250,41.217,21.323575,18.077,40064.0
3,53.574539,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.656089,1397,5,5,750,250,38.085,16.165704,19.82,40064.0
4,55.558341,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.658073,1670,6,6,1000,250,38.085,16.165704,19.82,40064.0


Identify lost packets

In [614]:
# Use ast.literal_eval to safely evaluate the string and extract 'packet_number'
lost_rows['packet_number'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['header']['packet_number'] if isinstance(x, str) else None)
lost_rows['trigger'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['trigger'] if isinstance(x, str) else None)
lost_rows[:5]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lost_rows['packet_number'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['header']['packet_number'] if isinstance(x, str) else None)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lost_rows['trigger'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['trigger'] if isinstance(x, str) else None)


Unnamed: 0,time,name,data,epoch_time,timestamp,packet_number,trigger
57818,30119.666856,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:58.722181,15030,time_threshold
257092,134199.582528,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:57:42.802097,67007,time_threshold
272272,142882.758682,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:57:51.485273,71044,time_threshold
272274,142882.766082,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:57:51.485281,71046,time_threshold
272275,142882.767537,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:57:51.485282,71047,time_threshold


In [615]:
lost_pk_csv_file_path = path + "middle/" + f"lost_pk_{time}_{port}.csv"
lost_rows.to_csv(lost_pk_csv_file_path, index=False)

In [616]:
## set to True if the packet is lost
pk_sent_rows['packet_lost'] = False

# Iterate through rows and set 'packet_lost' to True where 'packet_number' values match
for _, lost_row in lost_rows.iterrows():
    packet_number = lost_row['packet_number']
    
    # Check if 'packet_number' exists in pk_sent_rows
    if packet_number in pk_sent_rows['packet_number'].values:
        pk_sent_rows.loc[pk_sent_rows['packet_number'] == packet_number, 'packet_lost'] = True

pk_sent_rows[19340:19345]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost
19340,38749.665494,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:56:07.352180,5500,20,19357,4837750,250,29.848,25.803092,4.652,50137.0,False
19341,38751.63687,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:56:07.354152,5775,21,19358,4838000,250,29.848,25.803092,4.652,50137.0,False
19342,38753.581617,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:56:07.356096,6050,22,19359,4838250,250,28.587,19.765626,6.009,50137.0,False
19343,38755.580321,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:56:07.358095,4950,18,19360,4838500,250,28.587,19.765626,6.009,50137.0,False
19344,38757.676322,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:56:07.360191,5225,19,19361,4838750,250,28.587,19.765626,6.009,50137.0,False


In [617]:
cols = ['time', 'epoch_time', 'timestamp', 'name', 'packet_number', 'offset', 'length', 'bytes_in_flight', 'packets_in_flight', 'smoothed_rtt', 'latest_rtt', 'rtt_variance', 'congestion_window', 'packet_lost', 'data']
processed_df = pk_sent_rows[cols]
processed_df[:5]

Unnamed: 0,time,epoch_time,timestamp,name,packet_number,offset,length,bytes_in_flight,packets_in_flight,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost,data
0,48.581362,1706285000000.0,2024-01-26 15:55:28.651096,transport:packet_sent,2,0,250,578,2,41.217,21.323575,18.077,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
1,49.613347,1706285000000.0,2024-01-26 15:55:28.652128,transport:packet_sent,3,250,250,851,3,41.217,21.323575,18.077,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
2,51.595266,1706285000000.0,2024-01-26 15:55:28.654110,transport:packet_sent,4,500,250,1124,4,41.217,21.323575,18.077,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
3,53.574539,1706285000000.0,2024-01-26 15:55:28.656089,transport:packet_sent,5,750,250,1397,5,38.085,16.165704,19.82,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
4,55.558341,1706285000000.0,2024-01-26 15:55:28.658073,transport:packet_sent,6,1000,250,1670,6,38.085,16.165704,19.82,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."


In [618]:
csv_file_path = path + "data/" + f"processed_sent_{time}_{port}.csv"
processed_df.to_csv(csv_file_path, sep='@', index=False)

In [619]:
weird_length_list =[]
for i in range(len(processed_df)):
    if processed_df.iloc[i]['length'] != 250:
        weird_length_list.append(processed_df.iloc[i])

weird_length_df = pd.DataFrame(weird_length_list)

In [620]:
print(len(weird_length_df))
weird_length_df[:5]

1246


Unnamed: 0,time,epoch_time,timestamp,name,packet_number,offset,length,bytes_in_flight,packets_in_flight,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost,data
2097,4250.162927,1706285000000.0,2024-01-26 15:55:32.852677,transport:packet_sent,2106,524250,1250,4857,14,29.426,30.491008,3.731,50137.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
4593,9250.462719,1706285000000.0,2024-01-26 15:55:37.852977,transport:packet_sent,4605,1149250,1250,18061,63,61.972,27.904756,34.982,50137.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
15096,30263.147658,1706285000000.0,2024-01-26 15:55:58.865662,transport:packet_sent,15112,3775750,1250,5125,15,26.566,22.959716,3.208,50137.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
24065,48201.569259,1706285000000.0,2024-01-26 15:56:16.804084,transport:packet_sent,24086,6019000,500,4650,16,30.017,23.571697,4.524,50137.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
25091,50263.620794,1706285000000.0,2024-01-26 15:56:18.866136,transport:packet_sent,25112,6275750,1250,5132,15,31.936,38.114586,3.952,50137.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."


In [621]:
# Sum up the 'length' column
total_length = processed_df['length'].sum()

print(f'Total Length: {total_length}')

Total Length: 56415000


## Receiver side data

In [622]:
pk_rcv_df = pk_rcv_rows.reset_index(drop=True)
pk_rcv_df[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp
0,138.097239,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662450
1,138.115729,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662469
2,138.126145,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662479
3,143.889895,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.668243
4,143.914739,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.668268


In [623]:
pk_rcv_series =  pk_rcv_df['data']
pk_rcv_num_list = []
offset_rcv_list = []
length_rcv_list = []
for i in range(len(pk_rcv_series)):
    s = pk_rcv_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    packet_number = data_dict['header']['packet_number']
    # Initialize offset to None in case 'frame_type': 'stream' is not found
    offset = None
    # Iterate through frames to find 'offset' for 'frame_type': 'stream'
    for frame in data_dict.get('frames', []):
        if frame.get('frame_type') == 'stream':
            offset = frame.get('offset')
            length = frame.get('length')
            break  # Stop iterating once 'offset' is found
    
    pk_rcv_num_list.append(packet_number)
    offset_rcv_list.append(offset)
    length_rcv_list.append(length)

pk_rcv_df['packet_number'] = pk_rcv_num_list
pk_rcv_df['offset'] = offset_rcv_list
pk_rcv_df['length'] = length_rcv_list

pk_rcv_df[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,packet_number,offset,length
0,138.097239,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662450,2,0,250
1,138.115729,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662469,3,250,250
2,138.126145,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.662479,4,500,250
3,143.889895,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.668243,5,750,250
4,143.914739,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:28.668268,6,1000,250


In [624]:
cols = ['time', 'epoch_time', 'timestamp', 'name', 'packet_number', 'offset', 'length', 'data']
processed_rcv_df = pk_rcv_df[cols]
processed_rcv_df[:5]


Unnamed: 0,time,epoch_time,timestamp,name,packet_number,offset,length,data
0,138.097239,1706285000000.0,2024-01-26 15:55:28.662450,transport:packet_received,2,0,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
1,138.115729,1706285000000.0,2024-01-26 15:55:28.662469,transport:packet_received,3,250,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
2,138.126145,1706285000000.0,2024-01-26 15:55:28.662479,transport:packet_received,4,500,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
3,143.889895,1706285000000.0,2024-01-26 15:55:28.668243,transport:packet_received,5,750,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
4,143.914739,1706285000000.0,2024-01-26 15:55:28.668268,transport:packet_received,6,1000,250,"{'header': {'packet_type': '1RTT', 'packet_num..."


In [625]:
csv_file_path = path + "data/" + f"processed_rcv_{time}_{port}.csv"
processed_rcv_df.to_csv(csv_file_path, sep='@')

In [626]:
weird_length_list =[]
for i in range(len(pk_rcv_df)):
    if pk_rcv_df.iloc[i]['length'] != 250:
        weird_length_list.append(pk_rcv_df.iloc[i])

weird_length_df = pd.DataFrame(weird_length_list)

In [627]:
weird_length_df[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,packet_number,offset,length
2097,4339.119894,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:32.863473,2106,524250,1250
4593,9344.369111,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:37.868722,4605,1149250,1250
15095,30353.749155,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:55:58.878102,15112,3775750,1250
24064,48289.651544,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:56:16.814004,24086,6019000,500
25090,50357.800814,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1706285000000.0,2024-01-26 15:56:18.882153,25112,6275750,1250
