In [114]:
import csv
import ast
import json
import statistics

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime, timedelta

In [115]:
port = "4202"
time = "1702"
raw_file_name = "log_20240117_0903_4202_client"
path = "/Users/molly/Desktop/2024-01-17/pm/sm01/#02/"

### Transform to JSON & CSV file
Process the qlog file to json file & csv file.

In [116]:
def QlogToJsonEntry(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Add commas between lines
    json_str = ",".join(lines)
    # Surround the entire string with square brackets to create a JSON array
    json_str = "[" + json_str + "]"
    # Load the JSON array
    json_entry = json.loads(json_str)
    
    return json_entry

def QlogToJson(json_entry, json_file_path):
    with open(json_file_path, 'w') as json_file:
        json.dump(json_entry, json_file, indent=2)

def JsonToCsv(json_entry, csv_file_path):
     # Open CSV file for writing
    with open(csv_file_path, 'w', newline='') as csv_file:
        # Create a CSV writer
        csv_writer = csv.writer(csv_file)

        # Write header row based on the keys of the second JSON object (assuming at least two objects are present)
        if len(json_entry) >= 2:
            header = list(json_entry[1].keys())
            csv_writer.writerow(header)

            # Write data rows starting from the second object
            for entry in json_entry[1:]:
                csv_writer.writerow(entry.values())

In [117]:
raw_path = path + "raw/" + raw_file_name
qlog_file_path = raw_path + ".qlog"
json_file_path = raw_path + ".json"
csv_file_path = raw_path + ".csv"
json_entry = QlogToJsonEntry(qlog_file_path)
QlogToJson(json_entry, json_file_path)
JsonToCsv(json_entry, csv_file_path)

In [118]:
df = pd.read_csv(csv_file_path)

Parse the data.

In [119]:
metrics_all_rows = df[(df['name'] == 'recovery:metrics_updated') & (df['data'].str.contains("'bytes_in_flight':"))]
metrics_sent_rows = df[(df['name'] == 'recovery:metrics_updated') & (df['data'].str.contains("{'bytes_in_flight':"))]
metrics_ack_rows = df[(df['name'] == 'recovery:metrics_updated') & (df['data'].str.contains("'latest_rtt':"))]
total_sent_rows = df[(df['name'] == 'transport:packet_sent')]
pk_sent_rows = df[(df['name'] == 'transport:packet_sent') & (df['data'].str.contains("'frame_type': 'stream'"))]
rcv_ack_rows = df[(df['name'] == 'transport:packet_received') & (df['data'].str.contains("'frame_type': 'ack'")) & (df['data'].str.contains("'packet_type': '1RTT'"))]
lost_rows = df[df['name'] == 'recovery:packet_lost']

# Get the count of rows
metrics_all_cnt = len(metrics_all_rows)
metrics_c_cnt = len(metrics_sent_rows)
metrics_ack_cnt = len(metrics_ack_rows)
total_sent_cnt = len(total_sent_rows)
pk_sent_cnt = len(pk_sent_rows)
rcv_ack_cnt = len(rcv_ack_rows)
lost_cnt = len(lost_rows)

print("packet_sent: ", pk_sent_cnt, metrics_c_cnt)
print("ack: ", rcv_ack_cnt, metrics_ack_cnt)
print(metrics_all_cnt, metrics_c_cnt, metrics_ack_cnt, pk_sent_cnt, rcv_ack_cnt, lost_cnt)

packet_sent:  150359 150326
ack:  37669 37619
187947 150326 37619 150359 37669 708


Concat `transport:packet_sent` & `recovery:metrics_updated`.

In [120]:
csv_file_path = path + "middle/" + f"sent_metrics_{time}_{port}.csv"
metrics_sent_rows.to_csv(csv_file_path, index=False)
csv_file_path = path + "middle/" + f"pk_sent_{time}_{port}.csv"
pk_sent_rows.to_csv(csv_file_path, index=False)

In [121]:
def insert(df, idx, new_row):
    df1 = df.iloc[:idx, :]
    df2 = df.iloc[idx:, :]
    df_new = pd.concat([df1, new_row, df2], ignore_index=True)
    return df_new

In [122]:
metrics_sent_rows = metrics_sent_rows.reset_index(drop=True)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)
print(metrics_sent_rows[:5])
print(pk_sent_rows[:5])


        time                      name  \
0  75.168594  recovery:metrics_updated   
1  75.192239  recovery:metrics_updated   
2  75.386667  recovery:metrics_updated   
3  78.021823  recovery:metrics_updated   
4  80.897812  recovery:metrics_updated   

                                               data  
0   {'bytes_in_flight': 70, 'packets_in_flight': 1}  
1   {'bytes_in_flight': 95, 'packets_in_flight': 2}  
2  {'bytes_in_flight': 376, 'packets_in_flight': 4}  
3  {'bytes_in_flight': 653, 'packets_in_flight': 5}  
4  {'bytes_in_flight': 930, 'packets_in_flight': 6}  
        time                   name  \
0  75.381614  transport:packet_sent   
1  78.002344  transport:packet_sent   
2  80.873854  transport:packet_sent   
3  83.678854  transport:packet_sent   
4  86.614114  transport:packet_sent   

                                                data  
0  {'header': {'packet_type': '1RTT', 'dcid': '36...  
1  {'header': {'packet_type': '1RTT', 'dcid': '36...  
2  {'header': {'packet_

In [123]:
ori_recover_c_len = len(metrics_sent_rows)
for i in range(pk_sent_cnt):
    time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']
    # print(i, time_diff_list)
    # time_diff >= 1: not the matching metrics_update
    while time_diff >= 1:
        data = metrics_sent_rows.iloc[i-1]['data']
        new_row_data = {'time': [pk_sent_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        # print(new_row)
        metrics_sent_rows = insert(metrics_sent_rows, i, new_row)
        time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']
    # time_diff < 0: missing metrics_update
    while time_diff < 0:
        # print(i, time_diff_list)
        metrics_sent_rows.drop(index=metrics_sent_rows.index[i], inplace=True)
        time_diff = metrics_sent_rows.iloc[i]['time'] - pk_sent_rows.iloc[i]['time']

print(ori_recover_c_len, len(metrics_sent_rows))


150326 150359


In [124]:
metrics_sent_rows = metrics_sent_rows.reset_index(drop=True)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)
print(len(metrics_sent_rows), len(pk_sent_rows))

# check whether there's still mismatch exist.
time_diff_list = metrics_sent_rows['time'] - pk_sent_rows['time']
mismatch_indices = time_diff_list[(time_diff_list >= 1) | (time_diff_list < 0)].index
if len(mismatch_indices) == 0:
    print("All Matched!")
else:
    print(mismatch_indices)


150359 150359
All Matched!


In [125]:
# def concat_metrics(packets_df, metrics_updated_df):
#     metrics_updated_df = metrics_updated_df.drop(columns=['time', 'name'])
#     df_with_metrics = pd.concat([packets_df, metrics_updated_df], axis=1)
#     return df_with_metrics

In [126]:
# metrics_sent_rows.rename(columns = {'data':'in_flight'}, inplace = True)
# df_with_metrics = concat_metrics(pk_sent_rows, metrics_sent_rows)
# df_with_metrics[:5]

In [127]:
metrics_sent_rows['bytes_in_flight'] = None
metrics_sent_rows['packets_in_flight'] = None

# Use ast.literal_eval to safely evaluate the string and extract 'bytes_in_flight' and 'packets_in_flight'
metrics_sent_rows[['bytes_in_flight', 'packets_in_flight']] = metrics_sent_rows['data'].apply(
    lambda x: pd.Series(ast.literal_eval(x)) if isinstance(x, str) else pd.Series([None, None]))

metrics_sent_rows[:5]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight
0,75.386667,recovery:metrics_updated,"{'bytes_in_flight': 376, 'packets_in_flight': 4}",376,4
1,78.021823,recovery:metrics_updated,"{'bytes_in_flight': 653, 'packets_in_flight': 5}",653,5
2,80.897812,recovery:metrics_updated,"{'bytes_in_flight': 930, 'packets_in_flight': 6}",930,6
3,83.695937,recovery:metrics_updated,"{'bytes_in_flight': 1207, 'packets_in_flight': 7}",1207,7
4,86.646875,recovery:metrics_updated,"{'bytes_in_flight': 1484, 'packets_in_flight': 8}",1484,8


In [128]:
pk_sent_rows['bytes_in_flight'] = metrics_sent_rows['bytes_in_flight']
pk_sent_rows['packets_in_flight'] = metrics_sent_rows['packets_in_flight']

pk_sent_rows[:5]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight
0,75.381614,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",376,4
1,78.002344,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",653,5
2,80.873854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",930,6
3,83.678854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1207,7
4,86.614114,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1484,8


Concat `transport:packet_received` & `recovery:metrics_updated`.

In [129]:
csv_file_path = path + "middle/" + f"ack_metrics_{time}_{port}.csv" 
metrics_ack_rows.to_csv(csv_file_path, index=False)
csv_file_path = path + "middle/" + f"rcv_ack_{time}_{port}.csv"
rcv_ack_rows.to_csv(csv_file_path, index=False)

In [130]:
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)
initial_ack_metrics = metrics_ack_rows.iloc[[0]]
metrics_ack_rows.drop(index=metrics_ack_rows.index[0], inplace=True)
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)

In [131]:
print(initial_ack_metrics.iloc[0]['data'])
metrics_ack_rows[:5]

{'min_rtt': 0, 'smoothed_rtt': 0, 'latest_rtt': 0, 'rtt_variance': 0, 'congestion_window': 40064, 'bytes_in_flight': 1252, 'packets_in_flight': 1}


Unnamed: 0,time,name,data
0,72.522917,recovery:metrics_updated,"{'min_rtt': 70.196041, 'smoothed_rtt': 70.1960..."
1,111.2775,recovery:metrics_updated,"{'min_rtt': 24.543594, 'smoothed_rtt': 64.489,..."
2,121.865052,recovery:metrics_updated,"{'smoothed_rtt': 60.063, 'latest_rtt': 29.0873..."
3,133.757187,recovery:metrics_updated,"{'min_rtt': 23.193385, 'smoothed_rtt': 55.454,..."
4,144.684583,recovery:metrics_updated,"{'min_rtt': 22.182916, 'smoothed_rtt': 51.295,..."


In [132]:
rcv_ack_rows[:5]

Unnamed: 0,time,name,data
0,111.401458,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
1,121.881823,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
2,133.776927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
3,144.724114,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
4,158.377708,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."


In [133]:
print(len(metrics_ack_rows), len(rcv_ack_rows))
for i in range(rcv_ack_cnt):
    time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
    # time_diff >= 1: not the matching metrics_update
    while time_diff > 0:
        print("> 0:", i, time_diff)
        if i == 0:
            data = initial_ack_metrics.iloc[0]['data']
        else:
            data = metrics_ack_rows.iloc[i-1]['data']
        new_row_data = {'time': [rcv_ack_rows.iloc[i]['time']], 'name':['recovery:metrics_updated'], 'data': [data]}
        new_row = pd.DataFrame(new_row_data)
        # print(new_row)
        metrics_ack_rows = insert(metrics_ack_rows, i, new_row)
        time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
    # time_diff < 0: missing metrics_update
    while time_diff <= -1:
        print("<= -1:", i, time_diff)
        metrics_ack_rows.drop(index=metrics_ack_rows.index[i], inplace=True)
        time_diff = metrics_ack_rows.iloc[i]['time'] - rcv_ack_rows.iloc[i]['time']
print(len(metrics_ack_rows), len(rcv_ack_rows))

37618 37669
<= -1: 0 -38.878541
> 0: 304 12.429740000000038
> 0: 386 0.030624999999417923
> 0: 448 12.394218000000365
> 0: 556 0.04473899999993591
> 0: 681 12.090520000001561
> 0: 736 0.034478999999919324
> 0: 772 6.331353999999919
> 0: 831 0.026926999998977408
> 0: 892 12.418021000001318
> 0: 969 6.153177000000142
> 0: 1111 6.0408859999988636
<= -1: 1154 -1.7948440000000119
> 0: 1155 0.02395799999976589
> 0: 1232 0.04520799999954761
> 0: 1262 17.44974000000002
> 0: 1263 17.44239600000037
> 0: 1288 4.85656299999755
> 0: 1466 25.975157000000763
> 0: 1947 12.398698000000877
> 0: 1984 14.474895999999717
> 0: 2045 0.01354199999695993
> 0: 2104 12.644739999999729
> 0: 2162 4.566041999998561
> 0: 2358 11.706457999996928
> 0: 2359 11.694166999997833
> 0: 2488 11.79276100000061
> 0: 2496 0.04786500000045635
> 0: 2778 0.036823000002186745
> 0: 2980 5.973437000000558
> 0: 3000 0.034740000002784654
> 0: 3300 0.03156200000375975
> 0: 3748 0.03890700000192737
> 0: 6375 13.564217999999528
> 0: 6376 

In [134]:
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)
# print(metrics_ack_rows[:5])
# print(rcv_ack_rows[:5])

# check whether there's still mismatch exist.
time_diff_list = metrics_ack_rows['time'] - rcv_ack_rows['time']
mismatch_indices = time_diff_list[(time_diff_list <= -1) | (time_diff_list > 0)].index
if len(mismatch_indices) == 0:
    print("All Matched!")
else:
    print(mismatch_indices)

Index([ 1154,  6737,  6960,  9265,  9352, 11492, 14645, 14724, 16326, 18033,
       19916, 21054, 21204, 23074, 25000, 26340, 27349, 27689, 27734, 28324,
       30585, 31527, 33006, 35365],
      dtype='int64')


In [135]:
ack_json_list = []
print(initial_ack_metrics)
metrics_ack_rows = pd.concat([initial_ack_metrics, metrics_ack_rows], axis=0).reset_index(drop=True)
print(metrics_ack_rows[:5])
for i in range(len(metrics_ack_rows)):
    s = metrics_ack_rows.iloc[i]['data'].replace("\'", "\"")
    json_object = json.loads(s)
    ack_json_list.append(json_object)

metrics_ack_df = pd.DataFrame(ack_json_list)
# Fill missing values in each row with the previous row's values
metrics_ack_df = metrics_ack_df.ffill(axis=0)

metrics_ack_rows.drop(index=metrics_ack_rows.index[0], inplace=True)
metrics_ack_rows = metrics_ack_rows.reset_index(drop=True)
metrics_ack_df.drop(index=metrics_ack_df.index[0], inplace=True)
metrics_ack_df = metrics_ack_df.reset_index(drop=True)
metrics_ack_df[:5]

       time                      name  \
0  2.251875  recovery:metrics_updated   

                                                data  
0  {'min_rtt': 0, 'smoothed_rtt': 0, 'latest_rtt'...  
         time                      name  \
0    2.251875  recovery:metrics_updated   
1  111.277500  recovery:metrics_updated   
2  121.865052  recovery:metrics_updated   
3  133.757187  recovery:metrics_updated   
4  144.684583  recovery:metrics_updated   

                                                data  
0  {'min_rtt': 0, 'smoothed_rtt': 0, 'latest_rtt'...  
1  {'min_rtt': 24.543594, 'smoothed_rtt': 64.489,...  
2  {'smoothed_rtt': 60.063, 'latest_rtt': 29.0873...  
3  {'min_rtt': 23.193385, 'smoothed_rtt': 55.454,...  
4  {'min_rtt': 22.182916, 'smoothed_rtt': 51.295,...  


Unnamed: 0,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
0,24.543594,64.489,24.543594,37.736,40064.0,2286,10.0
1,24.543594,60.063,29.087364,37.152,40064.0,2502,9.0
2,23.193385,55.454,23.193385,37.081,40064.0,1948,7.0
3,22.182916,51.295,22.182916,36.128,40064.0,1939,7.0
4,22.182916,48.574,29.528354,32.537,40064.0,2493,9.0


In [136]:
metrics_ack_rows = pd.concat([metrics_ack_rows, metrics_ack_df], axis=1).reset_index(drop=True)
metrics_ack_rows = metrics_ack_rows.drop(columns=['data'])
metrics_ack_rows[:5]

Unnamed: 0,time,name,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
0,111.2775,recovery:metrics_updated,24.543594,64.489,24.543594,37.736,40064.0,2286,10.0
1,121.865052,recovery:metrics_updated,24.543594,60.063,29.087364,37.152,40064.0,2502,9.0
2,133.757187,recovery:metrics_updated,23.193385,55.454,23.193385,37.081,40064.0,1948,7.0
3,144.684583,recovery:metrics_updated,22.182916,51.295,22.182916,36.128,40064.0,1939,7.0
4,158.359739,recovery:metrics_updated,22.182916,48.574,29.528354,32.537,40064.0,2493,9.0


In [137]:
print(len(rcv_ack_rows), len(metrics_ack_df))
rcv_ack_rows[:5]

37669 37669


Unnamed: 0,time,name,data
0,111.401458,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
1,121.881823,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
2,133.776927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
3,144.724114,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."
4,158.377708,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num..."


In [138]:
rcv_ack_rows = pd.concat([rcv_ack_rows, metrics_ack_df], axis=1)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

print(len(rcv_ack_rows), len(metrics_ack_df))
rcv_ack_rows[-5:]

37669 37669


Unnamed: 0,time,name,data,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight
37664,450027.962693,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",14.167031,23.68,24.051875,2.537,18556.0,2511,9.0
37665,450050.207276,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",14.167031,24.943,33.787531,4.429,18556.0,3084,11.0
37666,450050.293995,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",14.167031,25.05,25.80623,3.537,18556.0,2247,8.0
37667,450061.880557,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",14.167031,25.917,31.992656,4.388,18556.0,2790,10.0
37668,450074.245141,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",14.167031,25.55,22.985281,4.023,18556.0,1953,7.0


Mapping the ACK ranges

In [139]:
acked_ranges_series = rcv_ack_rows['data']
acked_ranges_list = []
for i in range(len(acked_ranges_series)):
    s = acked_ranges_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    # Extract 'acked_ranges' from all frames
    acked_ranges = [range_entry for frame in data_dict['frames'] if 'acked_ranges' in frame for range_entry in frame['acked_ranges']]
    acked_ranges_list.append(acked_ranges)

acked_ranges_df = pd.DataFrame({"acked_ranges": acked_ranges_list})
acked_ranges_df[54775:54785]

Unnamed: 0,acked_ranges


In [140]:
rcv_ack_rows = pd.concat([rcv_ack_rows, acked_ranges_df], axis=1)
rcv_ack_rows = rcv_ack_rows.reset_index(drop=True)

rcv_ack_rows[:5]

Unnamed: 0,time,name,data,min_rtt,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,bytes_in_flight,packets_in_flight,acked_ranges
0,111.401458,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",24.543594,64.489,24.543594,37.736,40064.0,2286,10.0,"[[0, 5]]"
1,121.881823,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",24.543594,60.063,29.087364,37.152,40064.0,2502,9.0,"[[0, 7]]"
2,133.776927,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",23.193385,55.454,23.193385,37.081,40064.0,1948,7.0,"[[0, 13]]"
3,144.724114,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",22.182916,51.295,22.182916,36.128,40064.0,1939,7.0,"[[6, 17]]"
4,158.377708,transport:packet_received,"{'header': {'packet_type': '1RTT', 'packet_num...",22.182916,48.574,29.528354,32.537,40064.0,2493,9.0,"[[6, 19]]"


In [141]:
pk_sent_series =  pk_sent_rows['data']
pk_num_list = []
offset_list = []
length_list = []
for i in range(len(pk_sent_series)):
    s = pk_sent_series.iloc[i]
    data_dict = json.loads(s.replace("\'", "\""))
    packet_number = data_dict['header']['packet_number']
    # Initialize offset to None in case 'frame_type': 'stream' is not found
    offset = None
    # Iterate through frames to find 'offset' for 'frame_type': 'stream'
    for frame in data_dict.get('frames', []):
        if frame.get('frame_type') == 'stream':
            offset = frame.get('offset')
            length = frame.get('length')
            break  # Stop iterating once 'offset' is found
    
    pk_num_list.append(packet_number)
    offset_list.append(offset)
    length_list.append(length)

pk_num_df = pd.DataFrame({"packet_number": pk_num_list, "offset": offset_list, "length": length_list})
pk_num_df[:5]

Unnamed: 0,packet_number,offset,length
0,1,0,250
1,2,250,250
2,3,500,250
3,4,750,250
4,5,1000,250


In [142]:
pk_sent_rows = pd.concat([pk_sent_rows, pk_num_df], axis=1)
pk_sent_rows = pk_sent_rows.reset_index(drop=True)

pk_sent_rows[:5]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight,packet_number,offset,length
0,75.381614,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",376,4,1,0,250
1,78.002344,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",653,5,2,250,250
2,80.873854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",930,6,3,500,250
3,83.678854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1207,7,4,750,250
4,86.614114,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1484,8,5,1000,250


In [143]:
pk_sent_rows['smoothed_rtt'] = np.nan
pk_sent_rows['latest_rtt'] = np.nan
pk_sent_rows['rtt_variance'] = np.nan
pk_sent_rows['congestion_window'] = np.nan

pk_sent_rows[:5]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window
0,75.381614,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",376,4,1,0,250,,,,
1,78.002344,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",653,5,2,250,250,,,,
2,80.873854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",930,6,3,500,250,,,,
3,83.678854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1207,7,4,750,250,,,,
4,86.614114,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1484,8,5,1000,250,,,,


In [144]:
# for _, ack_row in rcv_ack_rows.iterrows():
#     acked_ranges = ack_row['acked_ranges']
#     smoothed_rtt = ack_row['smoothed_rtt']
#     latest_rtt = ack_row['latest_rtt']
#     rtt_variance = ack_row['rtt_variance']
#     congestion_window = ack_row['congestion_window']
    
#     # Iterate through acked_ranges and update corresponding rows in pk_sent_rows
#     for ack_range in acked_ranges:
#         print(ack_range)
#         if isinstance(ack_range, list):
#             start_packet, end_packet = ack_range[0], ack_range[-1]
#         else:
#             start_packet, end_packet = ack_range, ack_range
        
#         # Filter only existing packet numbers in the range
#         existing_packets = set(pk_sent_rows['packet_number'])
#         packet_numbers_to_update = set(range(start_packet, end_packet + 1)).intersection(existing_packets)

#         # Update rows for existing packet numbers
#         mask = pk_sent_rows['packet_number'].isin(packet_numbers_to_update)
#         # Update only if the packet_number is found in the range and the information is not already set
#         pk_sent_rows.loc[mask, 'smoothed_rtt'] = pk_sent_rows.loc[mask, 'smoothed_rtt'].fillna(smoothed_rtt)
#         pk_sent_rows.loc[mask, 'latest_rtt'] = pk_sent_rows.loc[mask, 'latest_rtt'].fillna(latest_rtt)
#         pk_sent_rows.loc[mask, 'congestion_window'] = pk_sent_rows.loc[mask, 'congestion_window'].fillna(congestion_window)
#         pk_sent_rows.loc[mask, 'rtt_variance'] = pk_sent_rows.loc[mask, 'rtt_variance'].fillna(rtt_variance)

# pk_sent_rows[:5]

In [145]:
def update_pk_sent_rows(row):
    acked_ranges = row['acked_ranges']
    smoothed_rtt = row['smoothed_rtt']
    latest_rtt = row['latest_rtt']
    rtt_variance = row['rtt_variance']
    congestion_window = row['congestion_window']

    for ack_range in acked_ranges:
        start_packet, end_packet = ack_range[0], ack_range[-1]
        existing_packets = set(pk_sent_rows['packet_number'])
        packet_numbers_to_update = set(range(start_packet, end_packet + 1)).intersection(existing_packets)

        mask = pk_sent_rows['packet_number'].isin(packet_numbers_to_update)
        pk_sent_rows.loc[mask, 'smoothed_rtt'] = pk_sent_rows.loc[mask, 'smoothed_rtt'].fillna(smoothed_rtt)
        pk_sent_rows.loc[mask, 'latest_rtt'] = pk_sent_rows.loc[mask, 'latest_rtt'].fillna(latest_rtt)
        pk_sent_rows.loc[mask, 'congestion_window'] = pk_sent_rows.loc[mask, 'congestion_window'].fillna(congestion_window)
        pk_sent_rows.loc[mask, 'rtt_variance'] = pk_sent_rows.loc[mask, 'rtt_variance'].fillna(rtt_variance)

# Apply the custom update function to each row in rcv_ack_rows
rcv_ack_rows.apply(update_pk_sent_rows, axis=1)

# Display the updated pk_sent_rows
pk_sent_rows[:5]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window
0,75.381614,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",376,4,1,0,250,64.489,24.543594,37.736,40064.0
1,78.002344,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",653,5,2,250,250,64.489,24.543594,37.736,40064.0
2,80.873854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",930,6,3,500,250,64.489,24.543594,37.736,40064.0
3,83.678854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1207,7,4,750,250,64.489,24.543594,37.736,40064.0
4,86.614114,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1484,8,5,1000,250,64.489,24.543594,37.736,40064.0


In [146]:
pk_sent_rows[-5:]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window
150354,450063.031495,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '76...",3069,11,150460,37516250,250,,,,
150355,450066.198735,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '76...",3348,12,150461,37516500,250,,,,
150356,450068.585349,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '76...",3627,13,150462,37516750,250,,,,
150357,450071.517901,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '76...",3906,14,150463,37517000,250,,,,
150358,450074.973318,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '76...",2232,8,150464,37517250,250,,,,


Identify lost packets

In [147]:
csv_file_path = path + "middle/" + f"lost_pk_{time}_{port}.csv"
lost_rows.to_csv(csv_file_path, index=False)

In [148]:
# Use ast.literal_eval to safely evaluate the string and extract 'packet_number'
lost_rows['packet_number'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['header']['packet_number'] if isinstance(x, str) else None)

lost_rows[:5]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lost_rows['packet_number'] = lost_rows['data'].apply(lambda x: ast.literal_eval(x)['header']['packet_number'] if isinstance(x, str) else None)


Unnamed: 0,time,name,data,packet_number
423,384.375208,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",86
646,574.279323,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",120
648,574.293802,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",121
816,704.004427,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",200
1274,1056.061197,recovery:packet_lost,"{'header': {'packet_type': '1RTT', 'packet_num...",311


In [149]:
pk_sent_rows['packet_lost'] = False

# Iterate through rows and set 'packet_lost' to True where 'packet_number' values match
for _, lost_row in lost_rows.iterrows():
    packet_number = lost_row['packet_number']
    
    # Check if 'packet_number' exists in pk_sent_rows
    if packet_number in pk_sent_rows['packet_number'].values:
        pk_sent_rows.loc[pk_sent_rows['packet_number'] == packet_number, 'packet_lost'] = True

pk_sent_rows[19340:19345]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost
19340,56990.612426,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'e7...",3906,14,19355,4798000,250,26.362,14.880625,6.025,11082.0,False
19341,56994.102166,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'e7...",4185,15,19356,4798250,250,26.362,14.880625,6.025,11082.0,False
19342,56996.837791,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'e7...",3069,11,19357,4798500,250,26.362,14.880625,6.025,11082.0,False
19343,56999.535551,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'e7...",3348,12,19358,4798750,250,26.69,28.992104,5.176,11082.0,False
19344,57001.962426,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': 'e7...",3627,13,19359,4799000,250,26.69,28.992104,5.176,11082.0,False


`processed_df`: The final dataframe that will contain all information of packets.

Set time to UMT+8.

In [150]:
def GetStartTime(json_data):
    # unit: ms
    refTime = json_data[0]["trace"]["common_fields"]["reference_time"]
    return refTime

def ProcessTime(df, reference_time):
    # Extract the "time" values from the DataFrame
    original_times = (df['time'].astype(float))

    # Calculate "epoch_time" and convert to timestamps
    epoch_times = (reference_time + original_times)
    timestamps = pd.to_datetime(epoch_times, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')

    # Create a new DataFrame with the processed data
    processed_df = pk_sent_rows.copy()
    processed_df['epoch_time'] = epoch_times
    processed_df['timestamp'] = timestamps

    return processed_df

In [151]:
refTime = GetStartTime(json_entry)
print(refTime)

1705482189105.091


In [152]:
processed_df = ProcessTime(pk_sent_rows, refTime)

# Add 8 hours to both epoch times and timestamps if file name is not UMT+8
epoch_times_gmt8 = processed_df["epoch_time"] + 8 * 3600 * 1000
processed_df["epoch_time"] = epoch_times_gmt8
timestamps_gmt8 = pd.to_datetime(epoch_times_gmt8, unit='ms').dt.strftime('%Y-%m-%d %H:%M:%S.%f')
processed_df["timestamp"] = timestamps_gmt8

processed_df[:5]

Unnamed: 0,time,name,data,bytes_in_flight,packets_in_flight,packet_number,offset,length,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost,epoch_time,timestamp
0,75.381614,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",376,4,1,0,250,64.489,24.543594,37.736,40064.0,False,1705511000000.0,2024-01-17 17:03:09.180472
1,78.002344,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",653,5,2,250,250,64.489,24.543594,37.736,40064.0,False,1705511000000.0,2024-01-17 17:03:09.183093
2,80.873854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",930,6,3,500,250,64.489,24.543594,37.736,40064.0,False,1705511000000.0,2024-01-17 17:03:09.185964
3,83.678854,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1207,7,4,750,250,64.489,24.543594,37.736,40064.0,False,1705511000000.0,2024-01-17 17:03:09.188770
4,86.614114,transport:packet_sent,"{'header': {'packet_type': '1RTT', 'dcid': '36...",1484,8,5,1000,250,64.489,24.543594,37.736,40064.0,False,1705511000000.0,2024-01-17 17:03:09.191705


In [153]:
cols = ['time', 'epoch_time', 'timestamp', 'name', 'packet_number', 'offset', 'length', 'bytes_in_flight', 'packets_in_flight', 'smoothed_rtt', 'latest_rtt', 'rtt_variance', 'congestion_window', 'packet_lost']
processed_df = processed_df[cols]
processed_df[:5]

Unnamed: 0,time,epoch_time,timestamp,name,packet_number,offset,length,bytes_in_flight,packets_in_flight,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost
0,75.381614,1705511000000.0,2024-01-17 17:03:09.180472,transport:packet_sent,1,0,250,376,4,64.489,24.543594,37.736,40064.0,False
1,78.002344,1705511000000.0,2024-01-17 17:03:09.183093,transport:packet_sent,2,250,250,653,5,64.489,24.543594,37.736,40064.0,False
2,80.873854,1705511000000.0,2024-01-17 17:03:09.185964,transport:packet_sent,3,500,250,930,6,64.489,24.543594,37.736,40064.0,False
3,83.678854,1705511000000.0,2024-01-17 17:03:09.188770,transport:packet_sent,4,750,250,1207,7,64.489,24.543594,37.736,40064.0,False
4,86.614114,1705511000000.0,2024-01-17 17:03:09.191705,transport:packet_sent,5,1000,250,1484,8,64.489,24.543594,37.736,40064.0,False


In [154]:
csv_file_path = path + "data/" + f"processed_{time}_{port}.csv"
processed_df.to_csv(csv_file_path, sep='@', index=False)

In [155]:
weird_length_list =[]
for i in range(len(processed_df)):
    if processed_df.iloc[i]['length'] != 250:
        weird_length_list.append(processed_df.iloc[i])

weird_length_df = pd.DataFrame(weird_length_list)

In [156]:
print(len(weird_length_df))
weird_length_df[:10]

184


Unnamed: 0,time,epoch_time,timestamp,name,packet_number,offset,length,bytes_in_flight,packets_in_flight,smoothed_rtt,latest_rtt,rtt_variance,congestion_window,packet_lost
365,1140.474687,1705511000000.0,2024-01-17 17:03:10.245565,transport:packet_sent,371,88750,1250,3790,10,43.381,25.418,15.922,11057.0,False
2153,6314.357914,1705511000000.0,2024-01-17 17:03:15.419449,transport:packet_sent,2161,530250,750,8033,27,50.58,28.854927,16.199,9969.0,False
2157,6328.128175,1705511000000.0,2024-01-17 17:03:15.433219,transport:packet_sent,2165,531500,750,5464,16,47.587,26.638885,18.134,9969.0,False
2627,7644.622601,1705511000000.0,2024-01-17 17:03:16.749713,transport:packet_sent,2635,646500,1000,4935,15,52.339,26.59626,23.629,7001.0,False
2872,8338.546403,1705511000000.0,2024-01-17 17:03:17.443637,transport:packet_sent,2880,706500,500,3877,13,47.052,44.970406,9.344,13061.0,False
3029,8796.546559,1705511000000.0,2024-01-17 17:03:17.901637,transport:packet_sent,3037,745250,1000,5772,18,56.019,26.644959,21.044,9853.0,False
3292,9572.62333,1705511000000.0,2024-01-17 17:03:18.677714,transport:packet_sent,3300,810750,1250,1837,3,44.877,49.13324,13.819,8911.0,False
3964,11486.061558,1705511000000.0,2024-01-17 17:03:20.591152,transport:packet_sent,3972,975250,1250,5464,16,48.569,49.234385,10.699,13190.0,False
4024,11654.65062,1705511000000.0,2024-01-17 17:03:20.759741,transport:packet_sent,4032,991000,500,6946,24,61.915,60.258209,6.555,9233.0,False
4040,11720.136714,1705511000000.0,2024-01-17 17:03:20.825227,transport:packet_sent,4048,995250,1250,1558,2,53.853,51.088125,12.478,10672.0,False


In [157]:
# Sum up the 'length' column
total_length = processed_df['length'].sum()

print(f'Total Length: {total_length}')

Total Length: 37715250


In [12]:
import pandas as pd
df = pd.read_csv("/Users/molly/Desktop/2024-01-17/pm/sm01/#02/data/processed_1702_4202.csv", sep='@')
filtered_df = df[df['packet_lost'] == True]
print(len(filtered_df))

707


In [13]:
df = pd.read_csv("/Users/molly/Desktop/2024-01-17/pm/sm01/#02/data/processed_1702_4203.csv", sep='@')
filtered_df = df[df['packet_lost'] == True]
print(len(filtered_df))

593
