In [1]:
import os
import csv
import ast
import json
import statistics

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime, timedelta

In [1209]:
date = "20240217"
date_with_dash = "2024-02-17"
port = "5203"
phone_time = "0631"
time = "1430"
device = "sm01"
sent_file_name = f"log_{date}_{time}_{port}_server"
received_file_name = f"log_{date}_{phone_time}_{port}_client"
path = f"/Volumes/MOLLY256/MOXA/{date_with_dash}/QUIC-280sec/{device}/#01/"

In [1210]:
sync_file_name = f"/Volumes/MOLLY256/MOXA/{date_with_dash}/QUIC-280sec/{device}/time_sync_{device}.json"
# sync_file = path + "raw/" + sync_file_name
with open(sync_file_name, 'r') as file:
    data = json.load(file)

# Extract values from the dictionary
values = list(data.values())
mean_diff = values[6] * 1000

### Transform to JSON & CSV file
Process the qlog file to json file & csv file.

In [1211]:
def QlogToJsonEntry(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Add commas between lines
    json_str = ",".join(lines)
    # Surround the entire string with square brackets to create a JSON array
    json_str = "[" + json_str + "]"
    # Load the JSON array
    json_entry = json.loads(json_str)
    
    return json_entry

def QlogToJson(json_entry, json_file_path):
    with open(json_file_path, 'w') as json_file:
        json.dump(json_entry, json_file, indent=2)

def JsonToCsv(json_entry, csv_file_path):
     # Open CSV file for writing
    with open(csv_file_path, 'w', newline='') as csv_file:
        # Create a CSV writer
        csv_writer = csv.writer(csv_file)

        # Write header row based on the keys of the second JSON object (assuming at least two objects are present)
        if len(json_entry) >= 2:
            header = list(json_entry[1].keys())
            csv_writer.writerow(header)

            # Write data rows starting from the second object
            for entry in json_entry[1:]:
                csv_writer.writerow(entry.values())

In [1212]:
# sender_side_file
sent_raw_path = path + "raw/" + sent_file_name
sent_qlog_file_path = sent_raw_path + ".qlog"
sent_json_file_path = sent_raw_path + ".json"
sent_csv_file_path = sent_raw_path + ".csv"
sent_json_entry = QlogToJsonEntry(sent_qlog_file_path)
QlogToJson(sent_json_entry, sent_json_file_path)
JsonToCsv(sent_json_entry, sent_csv_file_path)

In [1213]:
received_raw_path = path + "raw/" + received_file_name
received_qlog_file_path = received_raw_path + ".qlog"
received_json_file_path = received_raw_path + ".json"
received_csv_file_path = received_raw_path + ".csv"
received_json_entry = QlogToJsonEntry(received_qlog_file_path)
QlogToJson(received_json_entry, received_json_file_path)
JsonToCsv(received_json_entry, received_csv_file_path)

In [1214]:
sent_df = pd.read_csv(sent_csv_file_path)
received_df = pd.read_csv(received_csv_file_path)

Set time to UMT+8.

In [1215]:
def GetStartTime(json_data):
    # unit: ms
    refTime = json_data[0]["trace"]["common_fields"]["reference_time"]
    return refTime

def ProcessTime(df, reference_time):
    # Extract the "time" values from the DataFrame
    original_times = (df['time'].astype(float))

    # Calculate "epoch_time" and convert to timestamps
    epoch_times = (reference_time + original_times)
    timestamps = pd.to_datetime(epoch_times, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')

    df['epoch_time'] = epoch_times
    df['timestamp'] = timestamps

    return df

In [1216]:
# No matter downlink or uplink, the file time that need to change is client side.
if int(port)%2 == 0: # UL
    clientStartTime = GetStartTime(sent_json_entry)
    print(clientStartTime)
    serverStartTime = GetStartTime(received_json_entry)
    print(serverStartTime)

    senderRefTime = clientStartTime + mean_diff
    rcverRefTime = serverStartTime

else:   # DL
    clientStartTime = GetStartTime(received_json_entry)
    print(clientStartTime)
    serverStartTime = GetStartTime(sent_json_entry)
    print(serverStartTime)
    startTimeDiff = (clientStartTime - serverStartTime) + mean_diff

    senderRefTime = serverStartTime
    rcverRefTime = clientStartTime + mean_diff


1708151492350.6018
1708151447027.9858


In [1217]:
sent_df = ProcessTime(sent_df, senderRefTime)
# Add 8 hours to both epoch times and timestamps to match UMT+8
# Also sync time with server
epoch_times_gmt8 = sent_df["epoch_time"] + 8 * 3600 * 1000
sent_df["epoch_time"] = epoch_times_gmt8
timestamps_gmt8 = pd.to_datetime(epoch_times_gmt8, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')
sent_df["timestamp"] = timestamps_gmt8

sent_df[-5:]

Unnamed: 0,time,name,data,epoch_time,timestamp
532476,280203.224061,recovery:metrics_updated,"{'smoothed_rtt': 20.775, 'latest_rtt': 21.7964...",1708181000000.0,2024-02-17 14:35:27.231209
532477,280203.227902,recovery:loss_timer_updated,{'event_type': 'cancelled'},1708181000000.0,2024-02-17 14:35:27.231213
532478,280203.231744,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.231217
532479,280213.463062,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.241448
532480,280213.499939,transport:connection_closed,"{'owner': 'remote', 'application_code': 0, 're...",1708181000000.0,2024-02-17 14:35:27.241485


In [1218]:
received_df = ProcessTime(received_df, rcverRefTime)
# if the sender is server, then it is no need to calculate time difference
epoch_times_gmt8 = received_df["epoch_time"] + 8 * 3600 * 1000
received_df["epoch_time"] = epoch_times_gmt8
timestamps_gmt8 = pd.to_datetime(epoch_times_gmt8, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')
received_df["timestamp"] = timestamps_gmt8

received_df[-5:]

Unnamed: 0,time,name,data,epoch_time,timestamp
186504,280229.259476,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.215887
186505,280229.294268,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.215922
186506,280229.392758,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'a0...",1708181000000.0,2024-02-17 14:35:27.216020
186507,280235.930622,transport:connection_closed,"{'owner': 'local', 'application_code': 0, 'rea...",1708181000000.0,2024-02-17 14:35:27.222558
186508,280236.330153,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'a0...",1708181000000.0,2024-02-17 14:35:27.222958


Parse the data.

In [1219]:
# sender side data
metrics_all_rows = sent_df[(sent_df['name'] == 'recovery:metrics_updated') & (sent_df['data'].str.contains("'bytes_in_flight':"))]
metrics_sent_rows = sent_df[(sent_df['name'] == 'recovery:metrics_updated') & (sent_df['data'].str.contains("{'bytes_in_flight':"))]
metrics_ack_rows = sent_df[(sent_df['name'] == 'recovery:metrics_updated') & (sent_df['data'].str.contains("'latest_rtt':"))]
total_sent_rows = sent_df[(sent_df['name'] == 'transport:packet_sent')]
pk_sent_rows = sent_df[(sent_df['name'] == 'transport:packet_sent') & (sent_df['data'].str.contains("'frame_type': 'stream'"))]
rcv_ack_rows = sent_df[(sent_df['name'] == 'transport:packet_received') & (sent_df['data'].str.contains("'frame_type': 'ack'")) & (sent_df['data'].str.contains("'packet_type': '1RTT'"))]
lost_rows = sent_df[sent_df['name'] == 'recovery:packet_lost']

# Get the count of rows
metrics_all_cnt = len(metrics_all_rows)
metrics_c_cnt = len(metrics_sent_rows)
metrics_ack_cnt = len(metrics_ack_rows)
total_sent_cnt = len(total_sent_rows)
pk_sent_cnt = len(pk_sent_rows)
rcv_ack_cnt = len(rcv_ack_rows)
lost_cnt = len(lost_rows)

print("packet_sent: ", pk_sent_cnt, metrics_c_cnt)
print("ack: ", rcv_ack_cnt, metrics_ack_cnt)
print(metrics_all_cnt, metrics_c_cnt, metrics_ack_cnt, pk_sent_cnt, rcv_ack_cnt, lost_cnt)

packet_sent:  139624 139642
ack:  37693 37653
177296 139642 37653 139624 37693 28


In [1220]:
pk_rcv_rows = received_df[(received_df['name'] == "transport:packet_received") & (received_df['data'].str.contains("'frame_type': 'stream'"))]
pk_rcv_rows = pk_rcv_rows.reset_index(drop=True)
print(len(pk_rcv_rows))
pk_rcv_rows[:5]

139622


Unnamed: 0,time,name,data,epoch_time,timestamp
0,229.83375,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216461
1,229.846354,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216474
2,229.856927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216485
3,229.86625,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216494
4,235.873906,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.222502


## Deal with sender side data
Concat `transport:packet_sent` & `recovery:metrics_updated`.

In [1221]:
metrics_sent_csv_file_path = path + "middle/" + f"sent_metrics_{time}_{port}.csv"
metrics_sent_rows.to_csv(metrics_sent_csv_file_path, index=False)
pk_sent_csv_file_path = path + "middle/" + f"pk_sent_{time}_{port}.csv"
pk_sent_rows.to_csv(pk_sent_csv_file_path, index=False)

In [1222]:
def insert(df, idx, new_row):
    df1 = df.iloc[:idx, :]
    df2 = df.iloc[idx:, :]
    df_new = pd.concat([df1, new_row, df2], ignore_index=True)
    return df_new

In [1223]:
metrics_sent_rows = metrics_sent_rows.reset_index(drop=True)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)
print(metrics_sent_rows[:5])
print(pk_sent_rows[:5])


         time                      name  \
0    1.949713  recovery:metrics_updated   
1    1.953973  recovery:metrics_updated   
2  179.296948  recovery:metrics_updated   
3  179.980144  recovery:metrics_updated   
4  181.166412  recovery:metrics_updated   

                                                data    epoch_time  \
0  {'bytes_in_flight': 1161, 'packets_in_flight': 2}  1.708180e+12   
1  {'bytes_in_flight': 1252, 'packets_in_flight': 3}  1.708180e+12   
2   {'bytes_in_flight': 310, 'packets_in_flight': 1}  1.708180e+12   
3   {'bytes_in_flight': 589, 'packets_in_flight': 2}  1.708180e+12   
4   {'bytes_in_flight': 862, 'packets_in_flight': 3}  1.708180e+12   

                    timestamp  
0  2024-02-17 14:30:47.029935  
1  2024-02-17 14:30:47.029939  
2  2024-02-17 14:30:47.207282  
3  2024-02-17 14:30:47.207965  
4  2024-02-17 14:30:47.209152  
         time                   name  \
0  179.974487  transport:packet_sent   
1  181.159219  transport:packet_sent   
2  183.1

In [1224]:
ori_recover_c_len = len(metrics_sent_rows)
for i in range(pk_sent_cnt):
    if(i >= len(metrics_sent_rows)):
        data = metrics_sent_rows.iloc[i-1]['data']
        new_row_data = {'time': [pk_sent_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        metrics_sent_rows = pd.concat([metrics_sent_rows, new_row], ignore_index=True)
        continue
    time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']
    # print(i, time_diff)
    # time_diff >= 1: not the matching metrics_update
    while time_diff >= 1:
        data = metrics_sent_rows.iloc[i-1]['data']
        new_row_data = {'time': [pk_sent_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        # print(new_row)
        metrics_sent_rows = insert(metrics_sent_rows, i, new_row)
        time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']
    # time_diff < 0: missing metrics_update
    while time_diff < 0:
        # print(i, time_diff_list)
        metrics_sent_rows.drop(index=metrics_sent_rows.index[i], inplace=True)
        time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']

    

# if len(metrics_sent_rows) < pk_sent_cnt:
#     d = pk_sent_cnt - len(metrics_sent_rows)
# data = metrics_sent_rows.iloc[len(metrics_sent_rows)-1]['data']

# for i in range(d):
#     last_row_data = {'time': [pk_sent_rows.iloc[len(metrics_sent_rows)-1]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
#     new_row_df = pd.DataFrame(last_row_data)
#     metrics_sent_rows = pd.concat([metrics_sent_rows, new_row], ignore_index=True)

print(ori_recover_c_len, len(metrics_sent_rows))


139642 139624


In [1225]:
metrics_sent_rows = metrics_sent_rows.reset_index(drop=True)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)
print(len(metrics_sent_rows), len(pk_sent_rows))

# check whether there's still mismatch exist.
time_diff_list = metrics_sent_rows['time'] - pk_sent_rows['time']
mismatch_indices = time_diff_list[(time_diff_list >= 1) | (time_diff_list < 0)].index
if len(mismatch_indices) == 0:
    print("All Matched!")
else:
    print(mismatch_indices)


139624 139624
All Matched!


In [1226]:
# extract bytes_in_flight & packets_in_flight
metrics_sent_rows['bytes_in_flight'] = None
metrics_sent_rows['packets_in_flight'] = None

# Use ast.literal_eval to safely evaluate the string and extract 'bytes_in_flight' and 'packets_in_flight'
metrics_sent_rows[['bytes_in_flight', 'packets_in_flight']] = metrics_sent_rows['data'].apply(
    lambda x: pd.Series(ast.literal_eval(x)) if isinstance(x, str) else pd.Series([None, None]))

metrics_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight
0,179.980144,recovery:metrics_updated,"{'bytes_in_flight': 589, 'packets_in_flight': 2}",1708180000000.0,2024-02-17 14:30:47.207965,589,2
1,181.166412,recovery:metrics_updated,"{'bytes_in_flight': 862, 'packets_in_flight': 3}",1708180000000.0,2024-02-17 14:30:47.209152,862,3
2,183.134005,recovery:metrics_updated,"{'bytes_in_flight': 1135, 'packets_in_flight': 4}",1708180000000.0,2024-02-17 14:30:47.211119,1135,4
3,185.169204,recovery:metrics_updated,"{'bytes_in_flight': 1408, 'packets_in_flight': 5}",1708180000000.0,2024-02-17 14:30:47.213155,1408,5
4,187.132815,recovery:metrics_updated,"{'bytes_in_flight': 1681, 'packets_in_flight': 6}",1708180000000.0,2024-02-17 14:30:47.215118,1681,6


In [1227]:
# Add bytes_in_flight & packets_in_flight to pk_sent_rows
pk_sent_rows['bytes_in_flight'] = metrics_sent_rows['bytes_in_flight']
pk_sent_rows['packets_in_flight'] = metrics_sent_rows['packets_in_flight']

pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight
0,179.974487,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.207960,589,2
1,181.159219,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.209145,862,3
2,183.129255,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.211115,1135,4
3,185.153769,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.213139,1408,5
4,187.128555,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.215114,1681,6


Concat `transport:packet_received` & `recovery:metrics_updated`.

In [1228]:
metrics_ack_csv_file_path = path + "middle/" + f"ack_metrics_{time}_{port}.csv" 
metrics_ack_rows.to_csv(metrics_ack_csv_file_path, index=False)
rcv_ack_csv_file_path = path + "middle/" + f"rcv_ack_{time}_{port}.csv"
rcv_ack_rows.to_csv(rcv_ack_csv_file_path, index=False)

In [1229]:
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)
initial_ack_metrics = metrics_ack_rows.iloc[[0]]
metrics_ack_rows.drop(index=metrics_ack_rows.index[0], inplace=True)
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)

In [1230]:
metrics_ack_rows[:3]

Unnamed: 0,time,name,data,epoch_time,timestamp
0,179.092171,recovery:metrics_updated,"{'min_rtt': 177.148046, 'smoothed_rtt': 177.14...",1708180000000.0,2024-02-17 14:30:47.207078
1,179.265519,recovery:metrics_updated,"{'smoothed_rtt': 177.149, 'latest_rtt': 177.16...",1708180000000.0,2024-02-17 14:30:47.207251
2,199.8093,recovery:metrics_updated,"{'min_rtt': 14.647779, 'smoothed_rtt': 156.836...",1708180000000.0,2024-02-17 14:30:47.227795


In [1231]:
rcv_ack_rows[:3]

Unnamed: 0,time,name,data,epoch_time,timestamp
0,179.271176,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.207257
1,199.815865,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.227801
2,209.556746,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.237542


In [1232]:
print(len(metrics_ack_rows), len(rcv_ack_rows))
for i in range(rcv_ack_cnt):
    if(i >= len(metrics_ack_rows)):
        data = metrics_ack_rows.iloc[i-1]['data']
        new_row_data = {'time': [rcv_ack_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        metrics_ack_rows = pd.concat([metrics_ack_rows, new_row], ignore_index=True)
        continue
    time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
    # time_diff >= 1: not the matching metrics_update
    while time_diff > 0:
        # print("> 0:", i, time_diff)
        if i == 0:
            data = initial_ack_metrics.iloc[0]['data']
        else:
            data = metrics_ack_rows.iloc[i-1]['data']
        new_row_data = {'time': [rcv_ack_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        metrics_ack_rows = insert(metrics_ack_rows, i, new_row)
        time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
    # time_diff < 0: missing metrics_update
    while time_diff <= -1:
        # print("<= -1:", i, time_diff)
        metrics_ack_rows.drop(index=metrics_ack_rows.index[i], inplace=True)
        time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
print(len(metrics_ack_rows), len(rcv_ack_rows))

37652 37693
37693 37693


In [1233]:
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

# check whether there's still mismatch exist.
time_diff_list = metrics_ack_rows['time'] - rcv_ack_rows['time']
mismatch_indices = time_diff_list[(time_diff_list <= -1) | (time_diff_list > 0)].index
if len(mismatch_indices) == 0:
    print("All Matched!")
else:
    print(mismatch_indices)

All Matched!


In [1234]:
ack_json_list = []
## Add the initial_ack_metrics for temporary
print(initial_ack_metrics)
metrics_ack_rows = pd.concat([initial_ack_metrics, metrics_ack_rows], axis=0).reset_index(drop=True)
for i in range(len(metrics_ack_rows)):
    s = metrics_ack_rows.iloc[i]['data'].replace("\'", "\"")
    json_object = json.loads(s)
    ack_json_list.append(json_object)

metrics_ack_df = pd.DataFrame(ack_json_list)
# Fill missing values in each row with the previous row's values
metrics_ack_df = metrics_ack_df.ffill(axis=0)

## drop initial_ack_metrics
metrics_ack_rows.drop(index=metrics_ack_rows.index[0], inplace=True)
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
metrics_ack_df.drop(index=metrics_ack_df.index[0], inplace=True)
metrics_ack_df = metrics_ack_df.reset_index(drop=True)
metrics_ack_df[:5]

       time                      name  \
0  1.941262  recovery:metrics_updated   

                                                data    epoch_time  \
0  {'min_rtt': 0, 'smoothed_rtt': 0, 'latest_rtt'...  1.708180e+12   

                    timestamp  
0  2024-02-17 14:30:47.029926  


Unnamed: 0,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
0,177.148046,177.148046,177.148046,88.574023,40064.0,814,2.0
1,14.647779,156.836,14.647779,90.45,40064.0,1911,7.0
2,14.647779,139.517,18.286301,102.474,40064.0,2457,9.0
3,14.647779,124.133,16.445294,107.623,40064.0,2184,8.0
4,14.647779,114.966,50.802309,99.049,40064.0,6825,25.0


In [1235]:
metrics_ack_rows = pd.concat([metrics_ack_rows, metrics_ack_df], axis=1).reset_index(drop=True)
# since we have parse out all the information in data, we can drop the data cl=olumn
metrics_ack_rows = metrics_ack_rows.drop(columns=['data'])
metrics_ack_rows[:5]

Unnamed: 0,time,name,epoch_time,timestamp,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
0,179.092171,recovery:metrics_updated,1708180000000.0,2024-02-17 14:30:47.207078,177.148046,177.148046,177.148046,88.574023,40064.0,814,2.0
1,199.8093,recovery:metrics_updated,1708180000000.0,2024-02-17 14:30:47.227795,14.647779,156.836,14.647779,90.45,40064.0,1911,7.0
2,209.548504,recovery:metrics_updated,1708180000000.0,2024-02-17 14:30:47.237534,14.647779,139.517,18.286301,102.474,40064.0,2457,9.0
3,213.718498,recovery:metrics_updated,1708180000000.0,2024-02-17 14:30:47.241704,14.647779,124.133,16.445294,107.623,40064.0,2184,8.0
4,252.072648,recovery:metrics_updated,1708180000000.0,2024-02-17 14:30:47.280058,14.647779,114.966,50.802309,99.049,40064.0,6825,25.0


In [1236]:
# Check whehter the length is equal before concating metrics into rcv_ack_rows
print(len(rcv_ack_rows), len(metrics_ack_df))

37693 37693


In [1237]:
rcv_ack_rows = pd.concat([rcv_ack_rows, metrics_ack_df], axis=1)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

print(len(rcv_ack_rows), len(metrics_ack_df))
rcv_ack_rows[-5:]

37693 37693


Unnamed: 0,time,name,data,epoch_time,timestamp,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
37688,280178.445388,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.206431,11.642333,21.082,22.828188,2.671,35205.0,3025,11.0
37689,280178.45775,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.206443,11.642333,20.83,19.070874,2.506,35205.0,2475,9.0
37690,280188.222872,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.216208,11.642333,20.747,20.166635,2.045,35205.0,2175,7.0
37691,280193.350684,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.221336,11.642333,20.63,19.818753,1.765,35205.0,1350,4.0
37692,280203.231744,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708181000000.0,2024-02-17 14:35:27.231217,11.642333,20.775,21.796411,1.615,35205.0,0,4.0


Mapping the ACK ranges

In [1238]:
acked_ranges_series = rcv_ack_rows['data']
acked_ranges_list = []
for i in range(len(acked_ranges_series)):
    s = acked_ranges_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    # Extract 'acked_ranges' from all frames
    acked_ranges = [range_entry for frame in data_dict['frames'] if 'acked_ranges' in frame for range_entry in frame['acked_ranges']]
    acked_ranges_list.append(acked_ranges)

acked_ranges_df = pd.DataFrame({"acked_ranges": acked_ranges_list})
acked_ranges_df[:5]

Unnamed: 0,acked_ranges
0,[[0]]
1,"[[0, 5]]"
2,"[[0, 8]]"
3,"[[0, 11]]"
4,"[[0, 13]]"


In [1239]:
rcv_ack_rows = pd.concat([rcv_ack_rows, acked_ranges_df], axis=1)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

rcv_ack_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight,acked_ranges
0,179.271176,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.207257,177.148046,177.148046,177.148046,88.574023,40064.0,814,2.0,[[0]]
1,199.815865,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.227801,14.647779,156.836,14.647779,90.45,40064.0,1911,7.0,"[[0, 5]]"
2,209.556746,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.237542,14.647779,139.517,18.286301,102.474,40064.0,2457,9.0,"[[0, 8]]"
3,213.728276,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.241714,14.647779,124.133,16.445294,107.623,40064.0,2184,8.0,"[[0, 11]]"
4,252.079353,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.280065,14.647779,114.966,50.802309,99.049,40064.0,6825,25.0,"[[0, 13]]"


In [1240]:
# parse out the packet_number & offset & length
pk_sent_series =  pk_sent_rows['data']
pk_num_list = []
offset_list = []
length_list = []
for i in range(len(pk_sent_series)):
    s = pk_sent_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    packet_number = data_dict['header']['packet_number']
    # Initialize offset to None in case 'frame_type': 'stream' is not found
    offset = None
    # Iterate through frames to find 'offset' for 'frame_type': 'stream'
    for frame in data_dict.get('frames', []):
        if frame.get('frame_type') == 'stream':
            offset = frame.get('offset')
            length = frame.get('length')
            break  # Stop iterating once 'offset' is found
    
    pk_num_list.append(packet_number)
    offset_list.append(offset)
    length_list.append(length)

pk_num_df = pd.DataFrame({"packet_number": pk_num_list, "offset": offset_list, "length": length_list})
pk_num_df[:5]

Unnamed: 0,packet_number,offset,length
0,2,0,250
1,3,250,250
2,4,500,250
3,5,750,250
4,6,1000,250


In [1241]:
pk_sent_rows = pd.concat([pk_sent_rows, pk_num_df], axis=1)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)

pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length
0,179.974487,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.207960,589,2,2,0,250
1,181.159219,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.209145,862,3,3,250,250
2,183.129255,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.211115,1135,4,4,500,250
3,185.153769,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.213139,1408,5,5,750,250
4,187.128555,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.215114,1681,6,6,1000,250


In [1242]:
pk_sent_rows['smoothed_rtt'] = np.nan
pk_sent_rows['latest_rtt'] = np.nan
pk_sent_rows['rtt_variance'] = np.nan
pk_sent_rows['congestion_window'] = np.nan

pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window
0,179.974487,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.207960,589,2,2,0,250,,,,
1,181.159219,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.209145,862,3,3,250,250,,,,
2,183.129255,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.211115,1135,4,4,500,250,,,,
3,185.153769,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.213139,1408,5,5,750,250,,,,
4,187.128555,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.215114,1681,6,6,1000,250,,,,


In [1243]:
def update_pk_sent_rows(row):
    acked_ranges = row['acked_ranges']
    smoothed_rtt = row['smoothed_rtt']
    latest_rtt = row['latest_rtt']
    rtt_variance = row['rtt_variance']
    congestion_window = row['congestion_window']

    for ack_range in acked_ranges:
        start_packet, end_packet = ack_range[0], ack_range[-1]
        existing_packets = set(pk_sent_rows['packet_number'])
        packet_numbers_to_update = set(range(start_packet, end_packet + 1)).intersection(existing_packets)

        mask = pk_sent_rows['packet_number'].isin(packet_numbers_to_update)
        pk_sent_rows.loc[mask, 'smoothed_rtt'] = pk_sent_rows.loc[mask, 'smoothed_rtt'].fillna(smoothed_rtt)
        pk_sent_rows.loc[mask, 'latest_rtt'] = pk_sent_rows.loc[mask, 'latest_rtt'].fillna(latest_rtt)
        pk_sent_rows.loc[mask, 'congestion_window'] = pk_sent_rows.loc[mask, 'congestion_window'].fillna(congestion_window)
        pk_sent_rows.loc[mask, 'rtt_variance'] = pk_sent_rows.loc[mask, 'rtt_variance'].fillna(rtt_variance)

# Apply the custom update function to each row in rcv_ack_rows
rcv_ack_rows.apply(update_pk_sent_rows, axis=1)

# Display the updated pk_sent_rows
pk_sent_rows[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window
0,179.974487,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.207960,589,2,2,0,250,156.836,14.647779,90.45,40064.0
1,181.159219,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.209145,862,3,3,250,250,156.836,14.647779,90.45,40064.0
2,183.129255,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.211115,1135,4,4,500,250,156.836,14.647779,90.45,40064.0
3,185.153769,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.213139,1408,5,5,750,250,156.836,14.647779,90.45,40064.0
4,187.128555,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.215114,1681,6,6,1000,250,139.517,18.286301,102.474,40064.0


Identify lost packets

In [1244]:
# Use ast.literal_eval to safely evaluate the string and extract 'packet_number'
lost_rows['packet_number'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['header']['packet_number'] if isinstance(x, str) else None)
lost_rows['trigger'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['trigger'] if isinstance(x, str) else None)
lost_rows[:5]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lost_rows['packet_number'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['header']['packet_number'] if isinstance(x, str) else None)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lost_rows['trigger'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['trigger'] if isinstance(x, str) else None)


Unnamed: 0,time,name,data,epoch_time,timestamp,packet_number,trigger
38015,21180.813867,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:08.208799,10474,reordering_threshold
38209,21281.039202,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:08.309025,10540,time_threshold
81159,44228.942356,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:31.256928,21989,time_threshold
105020,56678.721542,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:43.706707,28176,time_threshold
105022,56678.729713,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:43.706715,28177,time_threshold


In [1245]:
lost_pk_csv_file_path = path + "middle/" + f"lost_pk_{time}_{port}.csv"
lost_rows.to_csv(lost_pk_csv_file_path, index=False)

In [1246]:
## set to True if the packet is lost
pk_sent_rows['packet_lost'] = False

# Iterate through rows and set 'packet_lost' to True where 'packet_number' values match
for _, lost_row in lost_rows.iterrows():
    packet_number = lost_row['packet_number']
    
    # Check if 'packet_number' exists in pk_sent_rows
    if packet_number in pk_sent_rows['packet_number'].values:
        pk_sent_rows.loc[pk_sent_rows['packet_number'] == packet_number, 'packet_lost'] = True

pk_sent_rows[19340:19345]

Unnamed: 0,time,name,data,epoch_time,timestamp,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost
19340,38931.198023,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:25.959183,4400,16,19358,4844000,250,27.859,22.627401,4.832,34020.0,False
19341,38933.149203,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:25.961135,4675,17,19359,4844250,250,27.712,26.683103,3.917,34020.0,False
19342,38935.118263,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:25.963104,4950,18,19360,4844500,250,27.712,26.683103,3.917,34020.0,False
19343,38937.227356,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:25.965213,5225,19,19361,4844750,250,27.712,26.683103,3.917,34020.0,False
19344,38939.213526,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:31:25.967199,5500,20,19362,4845000,250,28.326,32.627785,4.166,34020.0,False


In [1247]:
cols = ['time', 'epoch_time', 'timestamp', 'name', 'packet_number', 'offset', 'length', 'bytes_in_flight', 'packets_in_flight', 'smoothed_rtt', 'latest_rtt', 'rtt_variance', 'congestion_window', 'packet_lost', 'data']
processed_df = pk_sent_rows[cols]
processed_df[:5]

Unnamed: 0,time,epoch_time,timestamp,name,packet_number,offset,length,bytes_in_flight,packets_in_flight,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost,data
0,179.974487,1708180000000.0,2024-02-17 14:30:47.207960,transport:packet_sent,2,0,250,589,2,156.836,14.647779,90.45,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
1,181.159219,1708180000000.0,2024-02-17 14:30:47.209145,transport:packet_sent,3,250,250,862,3,156.836,14.647779,90.45,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
2,183.129255,1708180000000.0,2024-02-17 14:30:47.211115,transport:packet_sent,4,500,250,1135,4,156.836,14.647779,90.45,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
3,185.153769,1708180000000.0,2024-02-17 14:30:47.213139,transport:packet_sent,5,750,250,1408,5,156.836,14.647779,90.45,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."
4,187.128555,1708180000000.0,2024-02-17 14:30:47.215114,transport:packet_sent,6,1000,250,1681,6,139.517,18.286301,102.474,40064.0,False,"{'header': {'packet_type': '1RTT', 'packet_num..."


In [1248]:
csv_file_path = path + "data/" + f"processed_sent_{time}_{port}.csv"
processed_df.to_csv(csv_file_path, sep='@', index=False)

## Receiver side data

In [1249]:
pk_rcv_df = pk_rcv_rows.reset_index(drop=True)
pk_rcv_df[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp
0,229.83375,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216461
1,229.846354,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216474
2,229.856927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216485
3,229.86625,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216494
4,235.873906,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.222502


In [1250]:
pk_rcv_series =  pk_rcv_df['data']
pk_rcv_num_list = []
offset_rcv_list = []
length_rcv_list = []
for i in range(len(pk_rcv_series)):
    s = pk_rcv_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    packet_number = data_dict['header']['packet_number']
    # Initialize offset to None in case 'frame_type': 'stream' is not found
    offset = None
    # Iterate through frames to find 'offset' for 'frame_type': 'stream'
    for frame in data_dict.get('frames', []):
        if frame.get('frame_type') == 'stream':
            offset = frame.get('offset')
            length = frame.get('length')
            break  # Stop iterating once 'offset' is found
    
    pk_rcv_num_list.append(packet_number)
    offset_rcv_list.append(offset)
    length_rcv_list.append(length)

pk_rcv_df['packet_number'] = pk_rcv_num_list
pk_rcv_df['offset'] = offset_rcv_list
pk_rcv_df['length'] = length_rcv_list

pk_rcv_df[:5]

Unnamed: 0,time,name,data,epoch_time,timestamp,packet_number,offset,length
0,229.83375,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216461,2,0,250
1,229.846354,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216474,3,250,250
2,229.856927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216485,4,500,250
3,229.86625,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.216494,5,750,250
4,235.873906,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",1708180000000.0,2024-02-17 14:30:47.222502,6,1000,250


In [1251]:
cols = ['time', 'epoch_time', 'timestamp', 'name', 'packet_number', 'offset', 'length', 'data']
processed_rcv_df = pk_rcv_df[cols]
processed_rcv_df[:5]


Unnamed: 0,time,epoch_time,timestamp,name,packet_number,offset,length,data
0,229.83375,1708180000000.0,2024-02-17 14:30:47.216461,transport:packet_received,2,0,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
1,229.846354,1708180000000.0,2024-02-17 14:30:47.216474,transport:packet_received,3,250,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
2,229.856927,1708180000000.0,2024-02-17 14:30:47.216485,transport:packet_received,4,500,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
3,229.86625,1708180000000.0,2024-02-17 14:30:47.216494,transport:packet_received,5,750,250,"{'header': {'packet_type': '1RTT', 'packet_num..."
4,235.873906,1708180000000.0,2024-02-17 14:30:47.222502,transport:packet_received,6,1000,250,"{'header': {'packet_type': '1RTT', 'packet_num..."


In [1252]:
csv_file_path = path + "data/" + f"processed_rcv_{time}_{port}.csv"
processed_rcv_df.to_csv(csv_file_path, sep='@')

## Real Packet Loss Data

In [27]:
### USER SETTINGS ###
database=""
# database = "/Volumes/MOLLY256/MOXA/"
# database = "/Users/molly/Desktop"
dates = [
    # "/Users/molly/Desktop/2024-01-26",
    # "/Volumes/MOLLY256/MOXA/2024-02-03",
    "/Volumes/MOLLY256/MOXA/2024-02-03",
]
devices = sorted([
    "sm00",
    "sm01",
])
exps = {
    "QUIC-450sec": (6, ["#{:02d}".format(i + 1) for i in range(6)]),
    "QUIC-300sec": (6, ["#{:02d}".format(i + 1) for i in range(6)]),
}

device_to_port = {"sm00": [4200, 4201], 
                  "sm01": [4202, 4203],
                  "sm02": [4204, 4205]}

In [28]:
def find_ul_file(database, date, exp, device):
    ul_files = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'data')
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if "processed_rcv" in file:
                    # Extract the numbers from the file name
                    numbers = file.split("_")[3]
                    if str(ports[0]) in numbers:
                        ul_files.append(os.path.join(root, file))
                        break  # Exit the inner loop once the port is found
    return ul_files

def find_dl_file(database, date, exp, device):
    dl_files = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'data')
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if "processed_rcv" in file:
                    # Extract the numbers from the file name
                    numbers = file.split("_")[3]
                    if str(ports[1]) in numbers:
                        dl_files.append(os.path.join(root, file))
                        break  # Exit the inner loop once the port is found
    return dl_files

def find_ul_loss_file(database, date, exp, device):
    ul_loss_files = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'middle')
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if "lost_pk" in file:
                    # Extract the numbers from the file name
                    numbers = file.split("_")[3]
                    if str(ports[0]) in numbers:
                        ul_loss_files.append(os.path.join(root, file))
                        break  # Exit the inner loop once the port is found
    return ul_loss_files

def find_dl_loss_file(database, date, exp, device):
    dl_loss_files = []
    exp_rounds, exp_list = exps[exp]
    ports = device_to_port.get(device, [])
    for exp_round in exp_list:
        folder_path = os.path.join(database, date, exp, device, exp_round, 'middle')
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if "lost_pk" in file:
                    # Extract the numbers from the file name
                    numbers = file.split("_")[3]
                    if str(ports[1]) in numbers:
                        dl_loss_files.append(os.path.join(root, file))
                        break  # Exit the inner loop once the port is found
    return dl_loss_files


In [29]:
def get_loss_data(lost_df, received_df):
    # Check if each row in ul_lost_df['packet_number'] is present in ul_received_df['packet_number']
    lost_in_received = lost_df['packet_number'].isin(received_df['packet_number'])

    # Get the rows in ul_lost_df where the packet number is present in ul_received_df
    exec_lat_df = lost_df[lost_in_received]

    exec_reorder_df = exec_lat_df[exec_lat_df['trigger'] == 'reordering_threshold']
    exec_time_df = exec_lat_df[exec_lat_df['trigger'] == 'time_threshold']

    # Get the rows in ul_lost_df where the packet number is not present in ul_received_df
    real_lost_df = lost_df[~lost_in_received]

    # Filter ul_lost_df for rows where 'trigger' is 'reordering threshold'
    lost_reorder_df = real_lost_df[real_lost_df['trigger'] == 'reordering_threshold']
    lost_time_df = real_lost_df[real_lost_df['trigger'] == 'time_threshold']

    return exec_lat_df, exec_reorder_df, exec_time_df, real_lost_df, lost_reorder_df, lost_time_df

def calculate_statistics(lost_reorder_df, lost_time_df, real_lost_df, exec_reorder_df, exec_time_df, exec_lat_df, lost_df, df):
    statistics_data = {
        'original_pkl': len(lost_df),
        'reordering_threshold': len(lost_reorder_df),
        'time_threshold': len(lost_time_df),
        'real_pkl': len(real_lost_df),
        'exec_reordering': len(exec_reorder_df),
        'exec_time': len(exec_time_df),
        'exec_lat': len(exec_lat_df),
        'reordering_pkl_rate(%)': 0 if len(real_lost_df) == 0 else len(lost_reorder_df)*100 / len(real_lost_df),
        'time_pkl_rate(%)': 0 if len(real_lost_df) == 0 else len(lost_time_df)*100 / len(real_lost_df),
        'real_pkl_rate(%)': 0 if len(lost_df) == 0 else len(real_lost_df)*100 / len(lost_df),
        'original_packet_loss_rate(%)': len(lost_df)*100 / (df.iloc[-1]['offset'] / 250),
        'adjusted_packet_loss_rate(%)': len(real_lost_df)*100 / (df.iloc[-1]['offset'] / 250)
    }

    # Convert the dictionary to a dataframe
    statistics_df = pd.DataFrame(statistics_data, index=[0])
    return statistics_df

In [30]:
# exec_lat_df, exec_reorder_df, exec_time_df, real_lost_df, lost_reorder_df, lost_time_df = get_loss_data(lost_rows, processed_rcv_df)

In [31]:
all_ul_files = []
all_ul_pkl_files = []
for date in dates:
    for exp in exps:
        for device in devices:
            ul_pk_files = find_ul_file(database, date, exp, device)
            ul_pk_loss_files = find_ul_loss_file(database, date, exp, device)
            all_ul_files.extend(ul_pk_files)
            all_ul_pkl_files.extend(ul_pk_loss_files)

for i in range(len(all_ul_files)):
    ul_rcv_df = pd.read_csv(all_ul_files[i], sep='@')
    ul_loss_df = pd.read_csv(all_ul_pkl_files[i])
    ul_exec_lat_df, ul_exec_reorder_df, ul_exec_time_df, ul_real_lost_df, ul_lost_reorder_df, ul_lost_time_df = get_loss_data(ul_loss_df, ul_rcv_df)
    ul_statistics = calculate_statistics(ul_lost_reorder_df, ul_lost_time_df, ul_real_lost_df, ul_exec_reorder_df, ul_exec_time_df, ul_exec_lat_df, ul_loss_df, ul_rcv_df)

    directory = os.path.dirname(all_ul_files[i])
    ul_loss_df['lost'] = False
    ul_loss_df['excl'] = False
    # Set 'lost' column to True for rows in ul_real_lost_df
    ul_loss_df.loc[ul_loss_df['packet_number'].isin(ul_real_lost_df['packet_number']), 'lost'] = True
    # Set 'excl' column to True for rows in ul_exec_lat_df
    ul_loss_df.loc[ul_loss_df['packet_number'].isin(ul_exec_lat_df['packet_number']), 'excl'] = True
    ul_loss_df.to_csv(f"{directory}/ul_real_lost_pk.csv", index=False)
    # modify to the statistics directory
    parts = directory.split("/")
    parts[-1] = "statistics"
    statistics_directory = "/".join(parts)
    ul_statistics.to_csv(f"{statistics_directory}/ul_statistics.csv", index=False)

In [None]:
all_dl_files = []
all_dl_pkl_files = []
for date in dates:
    for exp in exps:
        for device in devices:
            dl_pk_files = find_dl_file(database, date, exp, device)
            dl_pk_loss_files = find_dl_loss_file(database, date, exp, device)
            all_dl_files.extend(dl_pk_files)
            all_dl_pkl_files.extend(dl_pk_loss_files)

for i in range(len(all_dl_files)):
    dl_rcv_df = pd.read_csv(all_dl_files[i], sep='@')
    dl_loss_df = pd.read_csv(all_dl_pkl_files[i])
    dl_exec_lat_df, dl_exec_reorder_df, dl_exec_time_df, dl_real_lost_df, dl_lost_reorder_df, dl_lost_time_df = get_loss_data(dl_loss_df, dl_rcv_df)
    dl_statistics = calculate_statistics(dl_lost_reorder_df, dl_lost_time_df, dl_real_lost_df, dl_exec_reorder_df, dl_exec_time_df, dl_exec_lat_df, dl_loss_df, dl_rcv_df)
    
    directory = os.path.dirname(all_dl_files[i])
    dl_loss_df['lost'] = False
    dl_loss_df['excl'] = False
    # Set 'lost' column to True for rows in dl_real_lost_df
    dl_loss_df.loc[dl_loss_df['packet_number'].isin(dl_real_lost_df['packet_number']), 'lost'] = True
    # Set 'excl' column to True for rows in ul_exec_lat_df
    dl_loss_df.loc[dl_loss_df['packet_number'].isin(dl_exec_lat_df['packet_number']), 'excl'] = True
    dl_loss_df.to_csv(f"{directory}/dl_real_lost_pk.csv", index=False)
    # modify to the statistics directory
    parts = directory.split("/")
    parts[-1] = "statistics"
    statistics_directory = "/".join(parts)
    dl_statistics.to_csv(f"{statistics_directory}/dl_statistics.csv", index=False)