In [1]:
import os
import sys
import argparse
import csv
import pandas as pd
import datetime as dt
import numpy as np
from pprint import pprint
from tqdm.notebook import tqdm
from pytictoc import TicToc
import traceback
from statistics import median
from statistics import mean
from statistics import mode
from statistics import stdev
from scipy import stats
from scipy import signal
import portion as P
import swifter

In [2]:
def makedir(dirpath, mode=0):  # mode=1: show message; mode=0: hide message
    if os.path.isdir(dirpath):
        if mode:
            print("mkdir: cannot create directory '{}': directory has already existed.".format(dirpath))
        return
    ### recursively make directory
    _temp = []
    while not os.path.isdir(dirpath):
        _temp.append(dirpath)
        dirpath = os.path.dirname(dirpath)
    while _temp:
        dirpath = _temp.pop()
        print("mkdir", dirpath)
        os.mkdir(dirpath)

def epoch_to_utc8(ts):
    """
    Convert an epoch time into a readable format.
    Switch from utc-0 into utc-8.
    
    Args:
        ts (float): timestamp composed of datetimedec + microsecond (e.g., 1644051509.989306)
    Returns:
        (datetime.datetime): a readable timestamp (utc-8)
    """
    return (dt.datetime.utcfromtimestamp(ts) + dt.timedelta(hours=8))

def str_to_datetime(ts):
    """
    Convert a timestamp string in microseconds or milliseconds into datetime.datetime

    Args:
        ts (str): timestamp string (e.g., 2022-09-29 16:24:58.252615)
    Returns:
        (datetime.datetime)
    """
    try:
        ts_datetime = dt.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S.%f")
    except:
        ts_datetime = dt.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
    return ts_datetime

def datetime_to_str(ts):
    """
    Convert a datetime timestamp in microseconds into str

    Args:
        ts (datetime.datetime): datetime timestamp (e.g., datetime.datetime(2022, 9, 29, 16, 24, 58, 252615))
    Returns:
        (str): timestamp string (e.g., 2022-09-29 16:24:58.252615)
    """
    try:
        ts_string = dt.datetime.strftime(ts, "%Y-%m-%d %H:%M:%S.%f")
    except:
        ts_string = dt.datetime.strftime(ts, "%Y-%m-%d %H:%M:%S")
    return ts_string

In [3]:
import csv
import pandas as pd
import numpy as np

class Payl:
    LENGTH = 250              # (Bytes)
    TAG = "000425d401df5e76"  # 2 71828 3 1415926 (hex)            : 8-bytes
    OFS_TIME = (16, 24)       # epoch time of 'yyyy/mm/dd hh:mm:ss': 4-bytes
    OFS_USEC = (24, 32)       # microsecond (usec)                 : 4-bytes
    OFS_SEQN = (32, 40)       # sequence number (start from 1)     : 4-bytes

class GParam:
    TRANS = 4
    RECV = 0
    TCP = 6
    UDP = 17
    
def epoch_to_utc8(ts):
    """
    Convert an epoch time into a readable format.
    Switch from utc-0 into utc-8.
    
    Args:
        ts (float): timestamp composed of datetimedec + microsecond (e.g., 1644051509.989306)
    Returns:
        (datetime.datetime): a readable timestamp (utc-8)
    """
    return (dt.datetime.utcfromtimestamp(ts) + dt.timedelta(hours=8))

def parse_packet_info(fin, fout, term, direct, proto):   
    new_header = ["seq", "rpkg", "frame_id", "frame_time", "frame_time_epoch", "pyl_time", "pyl_time_epoch"]
    timestamp_list = []
    seq_set = set()
    # Open the CSV file for reading
    with open(fin, 'r') as csvfile:
        # Create a CSV reader
        csvreader = csv.reader(csvfile, delimiter='@')

        # Read the header row
        header = next(csvreader)
        # print(header)
        
        # Iterate through the rows in the CSV file
        for content in tqdm(csvreader, ncols=1000):
            # print(content)
            row = {k: v for k, v in zip(header, content)}
            
            # server/client; uplink/downlink
            if (term == "server" and direct == "ul") and int(row['sll.pkttype']) != GParam.RECV:
                continue
            elif (term == "client" and direct == "ul") and int(row['sll.pkttype']) != GParam.TRANS:
                continue
            elif (term == "server" and direct == "dl") and int(row['sll.pkttype']) != GParam.TRANS:
                continue
            elif (term == "client" and direct == "dl") and int(row['sll.pkttype']) != GParam.RECV:
                continue
            
            # udp/tcp
            if proto == "udp" and int(row['ip.proto']) != GParam.UDP:
                continue
            elif proto == "tcp" and int(row['ip.proto']) != GParam.TCP:
                continue
            
            # check customized packet
            if proto == "udp" and not ((int(row['udp.length']) > Payl.LENGTH) and (int(row['udp.length']) % Payl.LENGTH == 8)):
                continue
            elif proto == "tcp" and Payl.TAG not in row['tcp.payload']:
                continue
            
            # print(content)
            
            payload = row['udp.payload']
            rpkg_num = int(row['udp.length']) // Payl.LENGTH
            offset = [s * Payl.LENGTH * 2 for s in list(range(rpkg_num))]  # 1-Byte == 2-hex-digits
            
            sequence_list = []
            payload_time_list = []
            payload_time_epoch_list = []
            for ofs in offset:
                try:
                    datetimedec = int(payload[ofs + Payl.OFS_TIME[0] : ofs + Payl.OFS_TIME[1]], 16)
                    microsec = int(payload[ofs + Payl.OFS_USEC[0] : ofs + Payl.OFS_USEC[1]], 16)
                    seq = int(payload[ofs + Payl.OFS_SEQN[0] : ofs + Payl.OFS_SEQN[1]], 16)
                except:
                    print(traceback.format_exc())
                    print(row['frame.time'])
                    print(payload)
                    sys.exit(1)
                
                payload_time = epoch_to_utc8(datetimedec + microsec * 1e-6)
                
                sequence_list.append(seq)
                payload_time_list.append(payload_time)
                payload_time_epoch_list.append(datetimedec + microsec * 1e-6)
            
            # print("rpkg", rpkg_num)
            # print("frame_id", int(row['frame.number']))
            # print("frame_time", pd.to_datetime(row['frame.time']).tz_localize(None))
            # print("frame_time_epoch", float(row['frame.time_epoch']))
            # print("seq", sequence_list)
            # print("pyl_time", payload_time_list)
            # print("pyl_time_epoch", payload_time_epoch_list)
            
            for (seq, pyl_time, pyl_time_epoch) in zip(sequence_list, payload_time_list, payload_time_epoch_list):
                if (seq, pyl_time_epoch) not in seq_set:
                    timestamp_list.append([seq, rpkg_num, int(row['frame.number']), pd.to_datetime(row['frame.time']).tz_localize(None), float(row['frame.time_epoch']), pyl_time, pyl_time_epoch])
                    seq_set.add((seq, pyl_time_epoch))

    # print(seq_set)
    # print(timestamp_list)

    timestamp_list = sorted(timestamp_list, key = lambda v : v[0])

    print("output >>>", fout)
    with open(fout, 'w', newline="") as fp:
        writer = csv.writer(fp)
        writer.writerow(new_header)
        writer.writerows(timestamp_list)
    
    return

In [4]:
testdir = "/Users/jackbedford/Desktop/MOXA/testset"

ul_s_file = os.path.join(testdir, [s for s in os.listdir(testdir) if s.startswith(("server_pcap_BL", "server_pcap_UL")) and s.endswith(".csv")][0])
ul_c_file = os.path.join(testdir, [s for s in os.listdir(testdir) if s.startswith(("client_pcap_BL", "client_pcap_UL")) and s.endswith(".csv")][0])
dl_s_file = os.path.join(testdir, [s for s in os.listdir(testdir) if s.startswith(("server_pcap_BL", "server_pcap_DL")) and s.endswith(".csv")][0])
dl_c_file = os.path.join(testdir, [s for s in os.listdir(testdir) if s.startswith(("client_pcap_BL", "client_pcap_DL")) and s.endswith(".csv")][0])
print(ul_s_file, ul_c_file, dl_s_file, dl_c_file, sep='\n')

/Users/jackbedford/Desktop/MOXA/testset/server_pcap_BL_sm00_3200_3201_2023-06-12_16-38-58_sock.csv
/Users/jackbedford/Desktop/MOXA/testset/client_pcap_BL_sm00_3200_3201_2023-06-12_16-38-57_sock.csv
/Users/jackbedford/Desktop/MOXA/testset/server_pcap_BL_sm00_3200_3201_2023-06-12_16-38-58_sock.csv
/Users/jackbedford/Desktop/MOXA/testset/client_pcap_BL_sm00_3200_3201_2023-06-12_16-38-57_sock.csv


In [5]:
parse_packet_info(ul_s_file, "udp_uplk_server_pkt_brief.csv", "server", "ul", "udp")
parse_packet_info(ul_c_file, "udp_uplk_client_pkt_brief.csv", "client", "ul", "udp")
parse_packet_info(dl_s_file, "udp_dnlk_server_pkt_brief.csv", "server", "dl", "udp")
parse_packet_info(dl_c_file, "udp_dnlk_client_pkt_brief.csv", "client", "dl", "udp")

0it [00:00, ?it/s]

output >>> udp_uplk_server_pkt_brief.csv


0it [00:00, ?it/s]

output >>> udp_uplk_client_pkt_brief.csv


0it [00:00, ?it/s]

output >>> udp_dnlk_server_pkt_brief.csv


0it [00:00, ?it/s]

output >>> udp_dnlk_client_pkt_brief.csv


In [16]:
files = ["udp_uplk_server_pkt_brief.csv", "udp_uplk_client_pkt_brief.csv", "udp_dnlk_server_pkt_brief.csv", "udp_dnlk_client_pkt_brief.csv"]

st_t = []
ed_t = []
for file in files:
    df = pd.read_csv(file)
    df['frame_time'] = pd.to_datetime(df['frame_time'])
    st_t.append(df.iloc[0]['frame_time'] - pd.Timedelta(seconds=5))
    ed_t.append(df.iloc[-1]['frame_time'] + pd.Timedelta(seconds=5))
    del df

st_t = max(st_t)
ed_t = min(ed_t)

for file in files:
    df = pd.read_csv(file)
    df['frame_time'] = pd.to_datetime(df['frame_time'])
    df = df[(df['frame_time'] > st_t) & (df['frame_time'] < ed_t)]
    df.to_csv(file, index=False)