### Demo 1: Get files with a specific format for each experiment and device

In [None]:
import os
import sys
import shutil
import argparse
import traceback
from genericpath import exists
from pprint import pprint

# ********************* User Settings *********************
database = "/home/wmnlab/D/database/"
date = "2022-09-29"
Exp_Name = {  # If the list is empty, it will list all directories in the current directory by default.
              # If the number of experiment times != the length of existing directories of list, it would trigger warning and skip the directory.
              # experiment_name:(number_of_experiment_rounds, list_of_experiment_round)
    "_Bandlock_Udp":(6, []),
    "_Bandlock_Tcp":(4, ["#01", "#02", "#03", "#04",]),
}
devices = [
    "sm05", 
    "sm06",
    "sm07",
    "sm08",
]
db_path = os.path.join(database, date)
# *********************************************************

# --------------------- Util Functions ---------------------
def error_handling(err_handle):
    """
    Print the error messages during the process.
    
    Args:
        err_handle (str-tuple): (input_filename, output_filename, error_messages : traceback.format_exc())
    Returns:
        (bool): check if the error_messages occurs, i.e., whether it is None.
    """
    if err_handle[2]:
        print()
        print("**************************************************")
        print("File decoding from '{}' into '{}' was interrupted.".format(err_handle[0], err_handle[1]))
        print()
        print(err_handle[2])
        return True
    return False

def makedir(dirpath, mode=0):  # mode=1: show message, mode=0 hide message
    if os.path.isdir(dirpath):
        if mode:
            print("mkdir: cannot create directory '{}': directory has already existed.".format(dirpath))
        return
    ### recursively make directory
    _temp = []
    while not os.path.isdir(dirpath):
        _temp.append(dirpath)
        dirpath = os.path.dirname(dirpath)
    while _temp:
        dirpath = _temp.pop()
        print("mkdir", dirpath)
        os.mkdir(dirpath)

# --------------------- Demo Code ---------------------
for _exp, (_times, _rounds) in Exp_Name.items():
    ### Check if the directories exist
    exp_path = os.path.join(db_path, _exp)
    print(exp_path)
    if _rounds:
        exp_dirs = [os.path.join(exp_path, item) for item in _rounds]
    else:
        exp_dirs = [os.path.join(exp_path, item) for item in sorted(os.listdir(exp_path))]
    exp_dirs = [item for item in exp_dirs if os.path.isdir(item)]
    print(_times)
    pprint(exp_dirs)
    if len(exp_dirs) != _times:
        print("************************************************************************************************")
        print("Warning: the number of directories does not match your specific number of experiment times.")
        print("************************************************************************************************")
        print()
        continue
    print()

    ### Check if a mi2log file exists, and then run decoding
    print(_exp)
    for exp_dir in exp_dirs:
        print(exp_dir)
        for dev in devices:
            # dir = os.path.join(exp_dir, dev)
            # dir = os.path.join(exp_dir, dev, "analysis")
            # dir = os.path.join(exp_dir, dev, "data")
            dir = os.path.join(exp_dir, dev, "raw")
            filenames = os.listdir(dir)
            for filename in filenames:
                if "diag_log" not in filename or not filename.endswith(".mi2log"):
                    continue
                print(filename)
                # ********************* Do Anything You Need! *********************
                # fin = os.path.join(dir, filename)
                # fout = os.path.join(dir, "..", "data", "{}.txt".format(filename[:-7]))
                # makedir(os.path.join(dir, "..", "data"))
        print()

### Demo 2: Time Conversion

In [2]:
import datetime as dt

def to_utc8(ts):
    """
    Convert a timestamp to a readable type (at utc-8)
    
    Args:
        ts (float): timestamp composed of datetimedec + microsecond (e.g., 1644051509.989306)
    Returns:
        datetime.datetime: Readable timestamp (at utc-8)
    """
    return (dt.datetime.utcfromtimestamp(ts) + dt.timedelta(hours=8))

epoch_time = int("6335565a", 16) + int("0003e14a", 16)*1e-6
print(epoch_time)
print(dt.datetime.utcfromtimestamp(epoch_time))


1664439898.254282
2022-09-29 08:24:58.254282


### Demo 3: Pandas 基本操作

In [None]:
import pandas as pd

### Read csv file
filename = "/home/wmnlab/Desktop/testspace/bandlock_analysis_0930/2022-09-29/_Bandlock_Udp/#01/sm05/data/client_pcap_BL_sm05_3210_3211_2022-09-29_16-24-57.csv"
dirpath = os.path.dirname(filename)
df = pd.read_csv(filename, sep='@')
print(df)
# print(df.head())
# print(df.tail())

### Convert frame.time into datetime
# df['frame.time'] = pd.to_datetime(df['frame.time'])
df['frame.time'] = pd.to_datetime(df['frame.time']).dt.tz_localize(None)  # to remove the time zone information while keeping the local time

with pd.option_context('display.max_rows', None):
    # print(df['data.len'])
    # print(df['data.len'].isin([4]))
    # print(~df['data.len'].isin([4]))
    # print(df.isnull())  # check if each element in the dataframe is np.nan
    # print(df.notnull())  # check if each element in the dataframe is not np.nan
    # print(df.isin(['-']))  # check if each element in the dataframe is '-'
    # print(~df.isin(['-']))  # check if each element in the dataframe is not(~) '-'
    # print(df['data.len'] == 4)
    # print(df['data.len'] != 4)
    # print(df['data.data'] == np.nan)
    # print(df['data.data'] != np.nan)
    # print(df['data.data'].str.contains('000425d401df5e76'))
    # print(df[df['data.data'].str.contains('000425d401df5e76')]['data.data'])
    # print(df[df['data.data'].str.contains('000425d401df5e76')][['frame.time', 'data.len', 'data.data']])
    pass

# print(df.loc[0, 'data.len'])
# print(df.loc[0, ['frame.time', 'data.len', 'data.data']])

# df = df.reindex(df.columns.tolist()+['add_column1', 'add_column2'], axis=1)
# df = pd.DataFrame(df, columns=df.columns.tolist()+['add_column1', 'add_column2'])

# df.to_csv(os.path.join(dirpath, "output.csv"), index=False)

# df['frame.time'] = pd.to_datetime(df["frame.time"])
# df['frame.time'] = pd.to_datetime(df["frame.time"]).dt.tz_localize(None)  # to remove the time zone information while keeping the local time

# print(dt.datetime.strptime("2022-09-29 16:24:58", '%Y-%m-%d %H:%M:%S'))
# print(dt.datetime.strptime("2022-09-29 16:24:58.252", '%Y-%m-%d %H:%M:%S.%f'))
# print(dt.datetime.strptime("2022-09-29 16:24:58.252615", '%Y-%m-%d %H:%M:%S.%f'))

# print(dt.datetime.strftime(dt.datetime(2022, 9, 29, 16, 24, 58), '%Y-%m-%d %H:%M:%S'))
# print(dt.datetime.strftime(dt.datetime(2022, 9, 29, 16, 24, 58, 252000), '%Y-%m-%d %H:%M:%S.%f')[:-3])
# print(dt.datetime.strftime(dt.datetime(2022, 9, 29, 16, 24, 58, 252615), '%Y-%m-%d %H:%M:%S.%f'))

### get column names
# curr_columns = df.columns.tolist()
# curr_columns = list(df)

with pd.option_context('display.max_rows', None):
    # print(df['data.len'])
    # print(df['data.len'].isin([4]))
    # print(~df['data.len'].isin([4]))
    # print(df['data.len'] == 4)
    # print(df['data.len'] != 4)
    # print(df['data.data'] == np.nan)
    # print(df['data.data'] != np.nan)
    # print(df['data.data'].str.contains('000425d401df5e76'))
    # print(df[df['data.data'].str.contains('000425d401df5e76')]['data.data'])
    # print(df[df['data.data'].str.contains('000425d401df5e76')][['frame.time', 'data.len', 'data.data']])
    pass

# print(df.loc[0, 'data.len'])
# print(df.loc[0, ['frame.time', 'data.len', 'data.data']])

# df = df.reindex(df.columns.tolist()+['add_column1', 'add_column2'], axis=1)
# df = pd.DataFrame(df, columns=df.columns.tolist()+['add_column1', 'add_column2'])

# df.to_csv(os.path.join(dirpath, "output.csv"), index=False)

# print(dt.datetime.strptime("2022-09-29 16:24:58", '%Y-%m-%d %H:%M:%S'))
# print(dt.datetime.strptime("2022-09-29 16:24:58.252", '%Y-%m-%d %H:%M:%S.%f'))
# print(dt.datetime.strptime("2022-09-29 16:24:58.252615", '%Y-%m-%d %H:%M:%S.%f'))

# print(dt.datetime.strftime(dt.datetime(2022, 9, 29, 16, 24, 58), '%Y-%m-%d %H:%M:%S'))
# print(dt.datetime.strftime(dt.datetime(2022, 9, 29, 16, 24, 58, 252000), '%Y-%m-%d %H:%M:%S.%f')[:-3])
# print(dt.datetime.strftime(dt.datetime(2022, 9, 29, 16, 24, 58, 252615), '%Y-%m-%d %H:%M:%S.%f'))

### Demo 4: Pandas 處理空值 & 取資料子集

In [7]:
import pandas as pd
# ******************************************************************************************************
# 檢查資料的每一列或每一行是否有「任何」空值
# print(df.isnull().any()) # 預設 axis=0 (axis='columns')，檢查每一直欄是否有空值，只要有一個 True 就是 True
# print(df.isnull().any(axis=1)) # axis=1 (axis='index')，則檢查每一列是否有空值，只要有一個 True 就是 True
# ******************************************************************************************************

df = pd.read_csv("/home/wmnlab/D/database/2022-09-29/_Bandlock_Udp/#01/sm05/data/cimon_sm05_2022-09-29_16-22-57_new.csv", dtype=str)

subset = "MNC,MCC,CID,PCI,LTE_RSRP,LTE_RSRQ,earfcn".split(',')
z = df.loc[df.loc[:, subset].isnull().any(axis=1), ['Date', 'GPSLat', 'GPSLon'] + subset]
z = df.loc[:, ['Date', 'GPSLat', 'GPSLon'] + subset].loc[df.loc[:, subset].isnull().any(axis=1)]
print(z)
z = df[df[subset].isnull().any(axis=1)][['Date', 'GPSLat', 'GPSLon'] + subset]
z = df[['Date', 'GPSLat', 'GPSLon'] + subset][df[subset].isnull().any(axis=1)]
print(z)

print(df['PCI'])
print(df['LTE_RSRP'])
print(df['LTE_RSRQ'])
print(df['NR_SSRSRP'])
print(df['NR_SSRSRQ'])
print(df['earfcn'])
print(df['PCI1'])
print(df['LTE_RSRP1'])
print(df['LTE_RSRQ1'])
print(df['earfcn1'])

# ******************************************************************************************************
# 取 subset 的方式有兩種
#   subset = ['feature1', 'feature2', 'feature3',]
#   condition = df['feature1'] == 1
#   condition_2 = (df['feature1'] == 1) & (df['feature2'] == 2)
#   condition_3 = (df['feature1'] == 1) & (df['feature2'] == 2) | (df['feature3'] == 3)
#   (1) df['feature1']
#       df[['feature1', 'feature2',]]
#       df[subset]
#       df[condition]
#       df[condition]['feature1']
#       df[condition][['feature1', 'feature2',]]
#       df[condition][subset]
#   (2) df.loc[:, 'feature1']
#       df.loc[:, ['feature1', 'feature2',]]
#       df.loc[:, subset]
#       df.loc[condition]
#       df.loc[condition, 'feature1']
#       df.loc[condition, ['feature1', 'feature2',]]
#       df.loc[condition, subset]
# NOTE 基本上可以把上述語法都當成一個新的 dataframe (i.e., df) 看待
# ******************************************************************************************************

Empty DataFrame
Columns: [Date, GPSLat, GPSLon, MNC, MCC, CID, PCI, LTE_RSRP, LTE_RSRQ, earfcn]
Index: []
Empty DataFrame
Columns: [Date, GPSLat, GPSLon, MNC, MCC, CID, PCI, LTE_RSRP, LTE_RSRQ, earfcn]
Index: []
0       217
1       217
2       217
3       217
4       217
       ... 
4475    404
4476    404
4477    404
4478    404
4479    404
Name: PCI, Length: 4480, dtype: object
0       -79
1       -75
2       -75
3       -75
4       -75
       ... 
4475    -78
4476    -78
4477    -71
4478    -71
4479    -70
Name: LTE_RSRP, Length: 4480, dtype: object
0       -12
1       -11
2       -11
3       -12
4       -12
       ... 
4475    -16
4476    -16
4477    -14
4478    -14
4479    -11
Name: LTE_RSRQ, Length: 4480, dtype: object
0         -
1         -
2         -
3       -79
4       -85
       ... 
4475      -
4476      -
4477      -
4478    -87
4479    -87
Name: NR_SSRSRP, Length: 4480, dtype: object
0         -
1         -
2         -
3       -12
4       -15
       ... 
4475      -
4476

In [10]:
import pandas as pd

df = pd.read_csv("/home/wmnlab/D/database/2022-09-29/_Bandlock_Udp/#01/sm05/data/cimon_sm05_2022-09-29_16-22-57_preproc.csv")
df['Handover.band-change']
print(df[df['Handover.band-change'] == False])

      index                 Date     GPSLat      GPSLon  GPSSpeed  RxRate  \
151     151  2022-09-29 16:25:33  24.998947  121.558907      7.18   31136   
164     164  2022-09-29 16:25:46  24.997877  121.559399     12.93   28078   
167     167  2022-09-29 16:25:49  24.997611  121.559664     13.76   28290   
188     188  2022-09-29 16:26:11  24.997348  121.562305     13.01   28356   
211     211  2022-09-29 16:26:34  24.998450  121.566103     18.12   28824   
...     ...                  ...        ...         ...       ...     ...   
4332   4332  2022-09-29 17:37:16  24.997440  121.560030     13.83  131232   
4338   4338  2022-09-29 17:37:22  24.997909  121.559437     11.53       0   
4344   4344  2022-09-29 17:37:28  24.998365  121.559203      7.38  130360   
4352   4352  2022-09-29 17:37:37  24.998930  121.558948      7.39  132010   
4355   4355  2022-09-29 17:37:40  24.999087  121.558783      7.75  133038   

      TxRate  DLBandwidth  ULBandwidth  MCC  ...  Handover.duration(sec)  \

In [None]:
import pandas as pd

filename = "/home/wmnlab/Desktop/testspace/bandlock_analysis_0930/2022-09-29/_Bandlock_Udp/#01/sm05/data/server_pcap_DL_sm05_3211_2022-09-29_16-24-41.csv"
df = pd.read_csv(filename)
# df['data.len'] = df['data.len'].round().apply(str)
# df['data.len'] = df['data.len'].round().astype(str)

In [None]:
import pandas as pd
import datetime as dt

def str_to_datetime(ts):
    """
    Convert a timestamp string in microseconds or milliseconds into datetime.datetime

    Args:
        ts (str): timestamp string (e.g., 2022-09-29 16:24:58.252615)
    Returns:
        (datetime.datetime)
    """
    try:
        ts_datetime = dt.datetime.strptime(ts, '%Y-%m-%d %H:%M:%S.%f')
    except:
        ts_datetime = dt.datetime.strptime(ts, '%Y-%m-%d %H:%M:%S')
    return ts_datetime

fin = "/home/wmnlab/Desktop/testspace/bandlock_analysis_0930/2022-09-29/_Bandlock_Udp/#01/sm05/analysis/diag_log_ho-info.csv"
df = pd.read_csv(fin)
df.loc[:, 'time'] = pd.to_datetime(df.loc[:, 'time'])

print(df.iloc[0])
print(type(df.iloc[0]))

test = pd.Series({"time":str_to_datetime("2022-09-29 15:53:59.061251"), "type_id":'-', "handoff_type":'-', "handoff_state":'-', "handoff_duration":'-', "PCI":'-', "nr_pci":'-'})
print(test)
print(type(test))