# Import & Define Functions

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import swifter
from collections import namedtuple
import matplotlib.pyplot as plt
from pprint import pprint
import portion as P

pd.set_option('display.max_columns', 200)
# pd.set_option('display.max_rows', 200)

class myQueue:
    def __init__(self, maxsize=0):
        self.data = []
        self.maxsize = maxsize if maxsize > 0 else float('inf')
    def tolist(self):
        return self.data
    def size(self):
        return self.maxsize
    def len(self):
        return len(self.data)
    def empty(self):
        return self.len() == 0
    def full(self):
        return self.len() == self.maxsize
    def pop(self, index=0):
        """
        if index > 0, recursively pop() until pop out the specific element.
        return the final popped-out element.
        """
        for _ in range(index, 0, -1):
            self.pop()
        return self.data.pop(0) if not self.empty() else None
    def push(self, element):
        """
        return 0 if success; 1 if the front is popped.
        """
        flag = 0
        if self.full():
            self.pop()
            flag = 1
        self.data.append(element)
        return flag
    def front(self):
        return self.data[0] if not self.empty() else None
    def rear(self):
        return self.data[-1] if not self.empty() else None
    def get(self, index):
        if isinstance(index, list):
            tmp = []
            for i in index:
                tmp = [*tmp, self.get(i)]
            return tmp
        return self.data[index] if index < self.len() and abs(index) <= self.len() else None
    def find(self, element):
        if isinstance(element, list):
            for ele in element:
                index = self.find(ele)
                if index != None:
                    return index
            return None
        return self.data.index(element) if element in self.data else None

In [3]:
def mi_parse_ho(df, tz=0, debug=False):
    df['Timestamp'] = pd.to_datetime(df['Timestamp']) + pd.Timedelta(hours=tz)
    
    ### Define Basic Element
    HO = namedtuple('HO', 'start, end, cause, others', defaults=tuple([None]*4))
    stNR = namedtuple('stNR', 'snrPCI, tnrPCI', defaults=tuple([None]*2))
    stLTE = namedtuple('stLTE', 'sPCI, sFreq, tPCI, tFreq', defaults=tuple([None]*4))
    NR_CEL = namedtuple('NR_CEL', 'nrPCI, nrFreq', defaults=tuple([None]*2))
    LTE_CEL = namedtuple('LTE_CEL', 'ePCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW', defaults=tuple([None]*8))
    C = namedtuple('C', HO._fields + stLTE._fields + stNR._fields + \
        LTE_CEL._fields + tuple([f'{s}1' for s in LTE_CEL._fields]) + NR_CEL._fields + tuple([f'{s}1' for s in NR_CEL._fields]), 
        defaults=tuple([None]*30))
    
    def dprint(*args, **kwargs):
        if debug:
            print(*args, **kwargs)
    
    def NR_OTA(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if row.type_id == '5G_NR_RRC_OTA_Packet':
            return True
        else:
            return False
    
    def CEL_INFO(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if row.type_id == 'LTE_RRC_Serv_Cell_Info':
            return True
        else:
            return False
    
    def nr_track(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if int(row.PCI) in [0, 65535]:  # 65535 is for samgsung; 0 is for xiaomi.
            return NR_CEL()
        else:
            return NR_CEL(int(row.PCI), int(row.Freq))
    
    def eci_track(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        PCI = int(row['PCI'])
        ECI = int(row['Cell Identity'])
        eNB = ECI // 256
        BID = int(row['Band ID'])
        DL_Freq = int(row['DL frequency'])
        DL_BW = row['DL bandwidth']
        UL_Freq = int(row['UL frequency'])
        UL_BW = row['UL bandwidth']
        return LTE_CEL(PCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW)
    
    def peek_nr(pos=None, look_after=0.5, look_before=0.0):
        ## look_after == 0.5 is a magic number
        ### TODO 先偷看 ho start - end 之間的 cell information
        if pos:  # position of end of an event
            for j in range(i, pos):
                if NR_OTA(j):
                    qpscell.push(nr_track(j))
        ### END TODO
        # dprint(f'pscell={pscell}')
        # dprint(qpscell.tolist())
        index = None
        for j in range(qpscell.len()):
            if pscell != qpscell.get(j):
                index = j
                break
        # dprint(f'index={index}')
        if index != None:
            return qpscell.pop(index)
        ### haven't find pci change yet!
        t = df['Timestamp'].iloc[i]
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df["Timestamp"].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                break
            if df['type_id'].iloc[j] != '5G_NR_RRC_OTA_Packet':
                continue
            row = df.iloc[j]
            if int(row.PCI) in [0, 65535]:  # 65535 is for samgsung; 0 is for xiaomi.
                return NR_CEL()
            else:
                return NR_CEL(int(row.PCI), int(row.Freq))
        return pscell
    
    def peek_eci(pos=None, look_after=0.5, look_before=0.0):
        ## look_after == 0.5 is a magic number
        ### TODO 先偷看 ho start - end 之間的 cell information
        if pos:  # position of end of an event
            for j in range(i, pos):
                if CEL_INFO(j):
                    qpcell.push(eci_track())
        ### END TODO
        # dprint(f'pcell={pcell}')
        # dprint(qpcell.tolist())
        index = None
        for j in range(qpcell.len()):
            if pcell != qpcell.get(j):
                index = j
                break
        # dprint(f'index={index}')
        if index != None:
            return qpcell.pop(index)
        ### haven't find pci change yet!
        t = df['Timestamp'].iloc[i]
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df['Timestamp'].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                break
            if df['type_id'].iloc[j] != 'LTE_RRC_Serv_Cell_Info':
                continue
            row = df.iloc[j]
            PCI = int(row['PCI'])
            ECI = int(row['Cell Identity'])
            eNB = ECI // 256
            BID = int(row['Band ID'])
            DL_Freq = int(row['DL frequency'])
            DL_BW = row['DL bandwidth']
            UL_Freq = int(row['UL frequency'])
            UL_BW = row['UL bandwidth']
            return LTE_CEL(PCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW)
        return pcell

    def find_1st_after(target, look_after=1.0):
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df["Timestamp"].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] in [1,'1']:
                return t1, j  # timestamp & position
        return None, None

    def find_1st_before(target, look_before=1.0):
        for j in range(i, -1, -1):  # 倒退嚕，最多走回頭
            t1 = df["Timestamp"].iloc[j]
            if (t - t1).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] in [1,'1']:
                return t1, j  # timestamp & position
        return None, None

    D = {
        ### Conn Setup/Rel & HO
        'Conn_Rel':[],    # Conn Release: rrcConnectionRelease
        'Conn_Setup':[],  # Conn Setup: rrcConnectionRequest + rrcConnectionSetup
        'LTE_HO': [],     # E_PCel -> E_PCel’: lte-rrc.t304 & LTE_PCel does change
        'SN_Rel': [],     # EUTRA + NR -> EUTRA:(CHT) lte-rrc.t304 & LTE_PCel does not change
                          #                     (TWM) nr-Config-r15: release (0) 
        'SN_Setup': [],   # EUTRA -> EUTRA + NR:(CHT) lte-rrc.t304 + nr-rrc.t304 + dualConnectivityPHR: setup (1) & LTE_PCel does not change
                          #                     (TWM) nr-rrc.t304 + dualConnectivityPHR: setup (1)
        'MN_HO': [],      # E_PCel + N_PSCel -> E_PCel’ + N_PSCel: lte-rrc.t304 + nr-rrc.t304 + dualConnectivityPHR: setup (1) & LTE_PCel does change
        'SN_HO': [],      # E_PCel + N_PSCel -> E_PCel + N_PSCel’: nr-rrc.t304
        'MNSN_HO': [],         # (TWM)
        'SN_Rel_MN_HO': [],    # (TWM)
        'SN_Setup_MN_HO': [],  # (TWM)
        ### Link Failure
        'SCG_Failure': [],   # scgFailureInformationNR-r15
        'MCG_Failure': [],   # rrcConnectionReestablishmentRequest + rrcConnectionReestablishmentComplete
        'NAS_Recovery': [],  # rrcConnectionReestablishmentRequest + rrcConnectionReestablishmentReject + rrcConnectionRequest + rrcConnectionSetup
        # MCG_Failure, NAS_Recovery may be caused by 'reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)'
        }
    
    A = { 'Conn_Rel':[], 'Conn_Setup':[],
        'LTE_HO': [], 'SN_Rel': [], 'SN_Setup': [], 'MN_HO': [], 'SN_HO': [],
        'MNSN_HO': [], 'SN_Rel_MN_HO': [], 'SN_Setup_MN_HO': [],
        'SCG_Failure': [], 'MCG_Failure': [], 'NAS_Recovery': [] }
    
    qpscell = myQueue(3)
    qpcell = myQueue(3)
    
    init = 1
    pcell, pscell = LTE_CEL(), NR_CEL()
    prev_pci, prev_freq = None, None
    
    for i, row in df.iterrows():
        if NR_OTA():
            qpscell.push(nr_track())
            continue
        elif CEL_INFO():
            qpcell.push(eci_track())
            continue
        if init:
            t_init, pci_init, freq_init = row.Timestamp, int(row.PCI), int(row.Freq)
            pcell = LTE_CEL(ePCI=pci_init, DL_Freq=freq_init)
            dprint(f"{t_init} | Initial PCI={pci_init} EARFCN={freq_init}")
            dprint()
            init = 0
        
        t, pci, freq = row.Timestamp, int(row.PCI), int(row.Freq)
        
        if (prev_pci, prev_freq) != (pci, freq):
            for j in range(i, len(df)):  # 往前走，最多走到底
                if CEL_INFO(j):
                    next_pcell = eci_track(j)
                    if next_pcell[0] == pci:
                        qpcell.push(next_pcell)
                        break
                elif not NR_OTA(j):
                    if df['PCI'].iloc[j] != pci:
                        break
        
        if not qpscell.empty():
            pscell = qpscell.pop()
        if not qpcell.empty():
            pcell = qpcell.pop()
        
        ### Conn_Rel
        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))
            A['Conn_Rel'].append(C(*HO(start=t), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0]), *pcell, *LTE_CEL(), *pscell, *NR_CEL()))
            dprint(f"{t}, {pd.NaT} | Conn_Rel at PCI={pci} EARFCN={freq}.")
            dprint(f'{tuple(pcell)} -> {tuple(LTE_CEL())}')
            dprint(f'{tuple(pscell)} ->{tuple(NR_CEL())}')
            pcell, pscell = LTE_CEL(), NR_CEL()
            dprint()

        ### Conn_Setup
        if df["rrcConnectionRequest"].iloc[i] == 1:
            a, j1 = find_1st_after('rrcConnectionReconfigurationComplete',look_after=2)
            b, j2 = find_1st_after('securityModeComplete',look_after=2)
            end = a if a > b else b
            j = j1 if a > b else j2
            _pcell = peek_eci(pos=j)
            D['Conn_Setup'].append(HO(start=t, end=end))
            A['Conn_Setup'].append(C(*HO(start=t, end=end), *stLTE(tPCI=pci, tFreq=freq), *stNR(), *pcell, *_pcell, *pscell, *pscell))
            dprint(f"{t}, {end} | Conn_Setup to PCI={pci} EARFCN={freq}.")
            dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
            dprint()
        
        ### SN_Setup, SN_Rel, MO_HO, LTE_HO
        if df["lte-rrc.t304"].iloc[i] == 1:
            end, j = find_1st_after('rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = pci, int(df['lte_targetPhysCellId'].iloc[i])
            serv_freq, target_freq = freq, int(df['dl-CarrierFreq'].iloc[i])
            nr_target_cell = int(df["nr_physCellId"].iloc[i])
            
            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others=f'Set up {n} SCell.'
            else:
                others=None
            
            if serv_freq != target_freq:
                others = f'{others} Inter-Freq HO.' if others else 'Inter-Freq HO.'
            
            ### SN_Setup, MN_HO
            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                ### SN_Setup
                if serv_cell == target_cell and serv_freq == target_freq:
                    _pscell = peek_nr(pos=j)
                    D['SN_Setup'].append(HO(start=t, end=end, others=others))
                    A['SN_Setup'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq), *stNR(tnrPCI=nr_target_cell), *pcell, *pcell, *pscell, *_pscell))
                    dprint(f"{t}, {end} | SN_Setup to nrPCI={nr_target_cell} | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
                    dprint()
                else:
                ### MN_HO
                    _pcell = peek_eci(pos=j)
                    D['MN_HO'].append(HO(start=t, end=end, others=others))
                    A['MN_HO'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *pscell))
                    dprint(f"{t}, {end} | MN_HO ({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
                    dprint()
            else:
            ### SN_Rel, LTE_HO
                ### SN_Rel
                if serv_cell == target_cell and serv_freq == target_freq:
                    a, b = find_1st_before("scgFailureInformationNR-r15")
                    if a is not None:
                        others = f'{others} Caused by scg-failure.' if others else 'Caused by scg-failure.'
                    D['SN_Rel'].append(HO(start=t, end=end, others=others))
                    A['SN_Rel'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq), *stNR(snrPCI=pscell[0]), *pcell, *pcell, *pscell, *NR_CEL()))
                    dprint(f"{t}, {end} | SN_Rel at nrPCI={pscell[0]} | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                    pscell = NR_CEL()
                    dprint()
                else:
                ### LTE_HO
                    _pcell = peek_eci(pos=j)
                    D['LTE_HO'].append(HO(start=t, end=end, others=others))
                    A['LTE_HO'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(), *pcell, *_pcell, *pscell, *pscell))
                    dprint(f"{t}, {end} | LTE_HO ({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
                    dprint()

        ### SN_HO
        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
            end, j = find_1st_after('rrcConnectionReconfigurationComplete')
            nr_target_cell = int(df["nr_physCellId"].iloc[i])
            _pscell = peek_nr(pos=j)
            D['SN_HO'].append(HO(start=t, end=end))
            A['SN_HO'].append(C(*HO(start=t, end=end), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0], tnrPCI=nr_target_cell), *pcell, *pcell, *pscell, *_pscell))
            dprint(f"{t}, {end} | SN_HO to nrPCI={nr_target_cell}")
            dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
            dprint()

        ### SCG_Failure
        if df["scgFailureInformationNR-r15"].iloc[i] == 1:
            # others = df["failureType-r15"].iloc[i]
            cause = df["failureType-r15"].iloc[i]
            _pscell = peek_nr()
            D['SCG_Failure'].append(HO(start=t, cause=cause))  # end time??
            A['SCG_Failure'].append(C(*HO(start=t, cause=cause), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0]), *pcell, *pcell, *pscell, *_pscell))
            dprint(f"{t}, {pd.NaT} | SCG_Failure at nrPCI={pscell[0]} | {cause}")
            dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
            ### SCG Fail 之後必定會 SN Rel
            dprint()
        
        ### MCG_Failure (type II), NAS_Recovery (type III)
        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:
            end1, j1 = find_1st_after('rrcConnectionReestablishmentComplete', look_after=1)
            end2, j2 = find_1st_after('rrcConnectionReestablishmentReject', look_after=1)
            end3, j3 = find_1st_after('rrcConnectionRequest', look_after=1)
            # others = df["reestablishmentCause"].iloc[i]
            cause = df["reestablishmentCause"].iloc[i]
            # target_cell = int(df['physCellId.3'].iloc[i])
            serv_cell, target_cell = pci, int(df['physCellId.3'].iloc[i])
            serv_freq, target_freq = freq, None
            
            ### MCG_Failure (type II)
            if (end1 and not end2) or (end1 and end2 and end1 < end2):
                # dprint(end1, end2)
                end, j = end1, j1
                _pcell = peek_eci()
                D['MCG_Failure'].append(HO(start=t, end=end, cause=cause))
                A['MCG_Failure'].append(C(*HO(start=t, end=end, cause=cause), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *NR_CEL()))
                dprint(f"{t}, {end} | MCG_Failure PCI={serv_cell} -> PCI={target_cell}, recconected to {pci} | {cause}")
                dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                pscell = NR_CEL()
                dprint()
                ### MCG Fail 之後有機會不經過 RRC Connection Setup 就 Reconnect
            else: 
            ### NAS_Recovery (type III)
                # dprint(end1, end2)
                end, j = end3, j3
                _pcell = peek_eci()
                D['NAS_Recovery'].append(HO(start=t, end=end-pd.Timedelta(microseconds=1) if end else None, cause=cause))  # end time??
                A['NAS_Recovery'].append(C(*HO(start=t, end=end-pd.Timedelta(microseconds=1) if end else None, cause=cause), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *NR_CEL()))
                dprint(f"{t}, {end} | NAS_Recovery PCI={serv_cell} -> PCI={target_cell} | {cause}")
                dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                pscell = NR_CEL()
                dprint()
        
        ### Update previous pci, freq
        prev_pci, prev_freq = pci, freq
    
    ### Build DataFrame
    df_HO = pd.DataFrame()
    for key in A.keys():
        df_HO = pd.concat([df_HO, \
            pd.DataFrame(A[key], index=[key]*len(A[key]))])
    df_HO = df_HO.sort_values(by=['start']).reset_index()
    df_HO = df_HO.rename(columns={'index': 'ho_type'})
    df_HO = df_HO.reindex(
        ['start','end','ho_type','interrupt','sPCI','sFreq','tPCI','tFreq','snrPCI','tnrPCI','cause','others'] + \
            df_HO.columns.tolist()[df_HO.columns.get_loc('ePCI'):df_HO.columns.get_loc('nrFreq1')+1], axis=1)
    df_HO['start'] = pd.to_datetime(df_HO['start'])
    df_HO['end'] = pd.to_datetime(df_HO['end'])
    df_HO['Timestamp'] = df_HO['start']
    df_HO['Type_ID'] = 'RRC_OTA_Handover_Parsing'
    df_HO['interrupt'] = (df_HO['end'] - df_HO['start']).dt.total_seconds()
    ### Set dtypes
    df_HO['ho_type'] = df_HO['ho_type'].astype('category')
    df_HO['cause'] = df_HO['cause'].astype('category')
    df_HO['others'] = df_HO['others'].astype('string')
    df_HO['DL_BW'] = df_HO['DL_BW'].astype('category')
    df_HO['DL_BW1'] = df_HO['DL_BW1'].astype('category')
    df_HO['UL_BW'] = df_HO['UL_BW'].astype('category')
    df_HO['UL_BW1'] = df_HO['UL_BW1'].astype('category')
    for tag in df_HO.columns[df_HO.columns.get_loc('sPCI'):df_HO.columns.get_loc('nrFreq1')+1]:
        if tag not in ['cause','others','DL_BW','DL_BW1','UL_BW','UL_BW1']:
            df_HO[tag] = df_HO[tag].astype('Int32')
    df_HO['interrupt'] = df_HO['interrupt'].astype('float32')
    df_HO['Timestamp'] = pd.to_datetime(df_HO['Timestamp'])
    df_HO['Type_ID'] = df_HO['Type_ID'].astype('category')
    return A, D, df_HO

In [4]:
def set_data(df):
    df = df.rename(columns={
        'sequence.number':'sequence_num',
        'transmit.time':'transmit_time',
        'transmit.time_epoch':'transmit_time_epoch',
        'arrival.time':'arrival_time',
        'arrival.time_epoch':'arrival_time_epoch',
    })
    df['sequence_num'] = df['sequence_num'].astype('Int32')
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df['transmit_time'] = pd.to_datetime(df['transmit_time'])
    df['arrival_time'] = pd.to_datetime(df['arrival_time'])
    df['lost'] = df['lost'].astype('boolean')
    df['excl'] = df['excl'].astype('boolean')
    df['Timestamp_epoch'] = df['Timestamp_epoch'].astype('float32')
    df['transmit_time_epoch'] = df['transmit_time_epoch'].astype('float32')
    df['arrival_time_epoch'] = df['arrival_time_epoch'].astype('float32')
    df['latency'] = df['latency'].astype('float32')
    return df

# Testing

In [5]:
df_ho = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/diag_log_qc02_2023-02-04_16-13-28_rrc.csv")
df_ul = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/udp_uplk_loss_latency.csv")
df_dl = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/udp_dnlk_loss_latency.csv")

_, _, df_ho = mi_parse_ho(df_ho, +8)
df_ul = set_data(df_ul)
df_dl = set_data(df_dl)

In [6]:
df_ho

Unnamed: 0,start,end,ho_type,interrupt,sPCI,sFreq,tPCI,tFreq,snrPCI,tnrPCI,cause,others,ePCI,ECI,eNB,BID,DL_Freq,DL_BW,UL_Freq,UL_BW,ePCI1,ECI1,eNB1,BID1,DL_Freq1,DL_BW1,UL_Freq1,UL_BW1,nrPCI,nrFreq,nrPCI1,nrFreq1,Timestamp,Type_ID
0,2023-02-04 16:14:55.534483,2023-02-04 16:14:55.662803,Conn_Setup,0.12832,,,35.0,3050.0,,,,,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,,,2023-02-04 16:14:55.534483,RRC_OTA_Handover_Parsing
1,2023-02-04 16:14:55.825220,2023-02-04 16:14:55.864444,SN_Setup,0.039224,35.0,3050.0,,,,35.0,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,35.0,631000.0,2023-02-04 16:14:55.825220,RRC_OTA_Handover_Parsing
2,2023-02-04 16:15:09.036863,NaT,Conn_Rel,,35.0,3050.0,,,35.0,,,,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,,,,,,,,,35.0,631000.0,,,2023-02-04 16:15:09.036863,RRC_OTA_Handover_Parsing
3,2023-02-04 16:16:16.509426,2023-02-04 16:16:16.633288,Conn_Setup,0.123862,,,35.0,3050.0,,,,,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,,,2023-02-04 16:16:16.509426,RRC_OTA_Handover_Parsing
4,2023-02-04 16:16:16.767598,2023-02-04 16:16:16.805470,SN_Setup,0.037872,35.0,3050.0,,,,35.0,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,35.0,631000.0,2023-02-04 16:16:16.767598,RRC_OTA_Handover_Parsing
5,2023-02-04 16:17:27.953435,2023-02-04 16:17:27.986813,SN_Rel,0.033378,35.0,3050.0,,,35.0,,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,35.0,631000.0,,,2023-02-04 16:17:27.953435,RRC_OTA_Handover_Parsing
6,2023-02-04 16:17:28.156703,2023-02-04 16:17:28.199117,SN_Setup,0.042414,35.0,3050.0,,,,160.0,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,160.0,631000.0,2023-02-04 16:17:28.156703,RRC_OTA_Handover_Parsing
7,2023-02-04 16:17:42.004563,2023-02-04 16:17:42.019062,SN_HO,0.014499,35.0,3050.0,,,160.0,35.0,,,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,160.0,631000.0,35.0,631000.0,2023-02-04 16:17:42.004563,RRC_OTA_Handover_Parsing
8,2023-02-04 16:18:10.084393,2023-02-04 16:18:10.117903,SN_Rel,0.03351,35.0,3050.0,,,35.0,,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,35.0,631000.0,,,2023-02-04 16:18:10.084393,RRC_OTA_Handover_Parsing
9,2023-02-04 16:18:10.412847,2023-02-04 16:18:10.454196,SN_Setup,0.041349,35.0,3050.0,,,,35.0,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:18:10.412847,RRC_OTA_Handover_Parsing


In [7]:
df_ul

Unnamed: 0,sequence_num,Timestamp,Timestamp_epoch,lost,excl,latency,transmit_time,transmit_time_epoch,arrival_time,arrival_time_epoch
0,10001,2023-02-04 16:16:36.807340,1.675499e+09,False,False,0.050546,2023-02-04 16:16:36.807362,1.675499e+09,2023-02-04 16:16:36.857886,1.675499e+09
1,10002,2023-02-04 16:16:36.809340,1.675499e+09,False,False,0.053014,2023-02-04 16:16:36.809358,1.675499e+09,2023-02-04 16:16:36.862354,1.675499e+09
2,10003,2023-02-04 16:16:36.811340,1.675499e+09,False,False,0.051354,2023-02-04 16:16:36.811359,1.675499e+09,2023-02-04 16:16:36.862694,1.675499e+09
3,10004,2023-02-04 16:16:36.813340,1.675499e+09,False,False,0.051926,2023-02-04 16:16:36.813356,1.675499e+09,2023-02-04 16:16:36.865266,1.675499e+09
4,10005,2023-02-04 16:16:36.815340,1.675499e+09,False,False,0.052000,2023-02-04 16:16:36.815360,1.675499e+09,2023-02-04 16:16:36.867340,1.675499e+09
...,...,...,...,...,...,...,...,...,...,...
264515,274516,2023-02-04 16:25:25.901142,1.675499e+09,False,False,0.006791,2023-02-04 16:25:25.901157,1.675499e+09,2023-02-04 16:25:25.907933,1.675499e+09
264516,274517,2023-02-04 16:25:25.903142,1.675499e+09,False,False,0.009353,2023-02-04 16:25:25.903157,1.675499e+09,2023-02-04 16:25:25.912495,1.675499e+09
264517,274518,2023-02-04 16:25:25.905142,1.675499e+09,False,False,0.007959,2023-02-04 16:25:25.905158,1.675499e+09,2023-02-04 16:25:25.913101,1.675499e+09
264518,274519,2023-02-04 16:25:25.907143,1.675499e+09,False,False,0.010403,2023-02-04 16:25:25.907158,1.675499e+09,2023-02-04 16:25:25.917546,1.675499e+09


In [8]:
df_dl

Unnamed: 0,sequence_num,Timestamp,Timestamp_epoch,lost,excl,latency,transmit_time,transmit_time_epoch,arrival_time,arrival_time_epoch
0,10001,2023-02-04 16:16:36.846746,1.675499e+09,False,False,0.005360,2023-02-04 16:16:36.846765,1.675499e+09,2023-02-04 16:16:36.852106,1.675499e+09
1,10002,2023-02-04 16:16:36.848746,1.675499e+09,False,False,0.003361,2023-02-04 16:16:36.848761,1.675499e+09,2023-02-04 16:16:36.852107,1.675499e+09
2,10003,2023-02-04 16:16:36.850746,1.675499e+09,False,False,0.006130,2023-02-04 16:16:36.850758,1.675499e+09,2023-02-04 16:16:36.856876,1.675499e+09
3,10004,2023-02-04 16:16:36.852746,1.675499e+09,False,False,0.009315,2023-02-04 16:16:36.852759,1.675499e+09,2023-02-04 16:16:36.862061,1.675499e+09
4,10005,2023-02-04 16:16:36.854747,1.675499e+09,False,False,0.007356,2023-02-04 16:16:36.854760,1.675499e+09,2023-02-04 16:16:36.862103,1.675499e+09
...,...,...,...,...,...,...,...,...,...,...
264515,274516,2023-02-04 16:25:25.901468,1.675499e+09,False,False,0.051039,2023-02-04 16:25:25.901478,1.675499e+09,2023-02-04 16:25:25.952507,1.675499e+09
264516,274517,2023-02-04 16:25:25.903468,1.675499e+09,False,False,0.055100,2023-02-04 16:25:25.903479,1.675499e+09,2023-02-04 16:25:25.958568,1.675499e+09
264517,274518,2023-02-04 16:25:25.905468,1.675499e+09,False,False,0.053103,2023-02-04 16:25:25.905478,1.675499e+09,2023-02-04 16:25:25.958571,1.675499e+09
264518,274519,2023-02-04 16:25:25.907468,1.675499e+09,False,False,0.057169,2023-02-04 16:25:25.907479,1.675499e+09,2023-02-04 16:25:25.964637,1.675499e+09


# Filtering

In [9]:
row = df_ho.iloc[2]
print(row.start)
print(row.end)
print(row.start > pd.to_datetime('2023-02-04 08:14:55.825220'))
print(row.start < pd.to_datetime('2023-02-04 08:14:55.825220'))
print(row.end > pd.to_datetime('2023-02-04 08:14:55.825220'))
print(row.end < pd.to_datetime('2023-02-04 08:14:55.825220'))

2023-02-04 16:15:09.036863
NaT
True
False
False
False


In [10]:
print(pd.to_datetime('2023'))
print(pd.to_datetime('2023-02'))
print(pd.to_datetime('2023-02-04'))
print(pd.to_datetime('2023-02-04 08'))
print(pd.to_datetime('2023-02-04 08:14'))
print(pd.to_datetime('2023-02-04 08:14:55'))
print(pd.to_datetime('2023-02-04 08:14:55.825220'))
print(pd.to_datetime('08:14'))
print(pd.to_datetime('08:14:55'))
print(pd.to_datetime('08:14:55.825220'))

2023-01-01 00:00:00
2023-02-01 00:00:00
2023-02-04 00:00:00
2023-02-04 08:00:00
2023-02-04 08:14:00
2023-02-04 08:14:55
2023-02-04 08:14:55.825220
2023-02-22 08:14:00
2023-02-22 08:14:55
2023-02-22 08:14:55.825220


In [11]:
print(df_ul.iloc[0].transmit_time)
print(df_ul.iloc[-1].transmit_time)
print(df_dl.iloc[0].arrival_time)
print(df_dl.iloc[-1].arrival_time)

2023-02-04 16:16:36.807362
2023-02-04 16:25:25.909159
2023-02-04 16:16:36.852106
2023-02-04 16:25:25.964639


In [12]:
start = df_ul.iloc[0].transmit_time
stop = df_ul.iloc[-1].transmit_time
df_ho_ul = df_ho.query('Timestamp >= @start & Timestamp <= @stop').copy()
df_ho_ul

Unnamed: 0,start,end,ho_type,interrupt,sPCI,sFreq,tPCI,tFreq,snrPCI,tnrPCI,cause,others,ePCI,ECI,eNB,BID,DL_Freq,DL_BW,UL_Freq,UL_BW,ePCI1,ECI1,eNB1,BID1,DL_Freq1,DL_BW1,UL_Freq1,UL_BW1,nrPCI,nrFreq,nrPCI1,nrFreq1,Timestamp,Type_ID
5,2023-02-04 16:17:27.953435,2023-02-04 16:17:27.986813,SN_Rel,0.033378,35.0,3050.0,,,35.0,,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,,,2023-02-04 16:17:27.953435,RRC_OTA_Handover_Parsing
6,2023-02-04 16:17:28.156703,2023-02-04 16:17:28.199117,SN_Setup,0.042414,35.0,3050.0,,,,160.0,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,,,160.0,631000.0,2023-02-04 16:17:28.156703,RRC_OTA_Handover_Parsing
7,2023-02-04 16:17:42.004563,2023-02-04 16:17:42.019062,SN_HO,0.014499,35.0,3050.0,,,160.0,35.0,,,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,160.0,631000.0,35.0,631000.0,2023-02-04 16:17:42.004563,RRC_OTA_Handover_Parsing
8,2023-02-04 16:18:10.084393,2023-02-04 16:18:10.117903,SN_Rel,0.03351,35.0,3050.0,,,35.0,,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,,,2023-02-04 16:18:10.084393,RRC_OTA_Handover_Parsing
9,2023-02-04 16:18:10.412847,2023-02-04 16:18:10.454196,SN_Setup,0.041349,35.0,3050.0,,,,35.0,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:18:10.412847,RRC_OTA_Handover_Parsing
10,2023-02-04 16:18:56.122709,2023-02-04 16:18:56.141647,MN_HO,0.018938,35.0,3050.0,186.0,3050.0,35.0,,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,186,54562082,213133,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:18:56.122709,RRC_OTA_Handover_Parsing
11,2023-02-04 16:18:56.474148,2023-02-04 16:18:56.490155,MN_HO,0.016007,186.0,3050.0,266.0,3050.0,35.0,,,Set up 1 SCell.,186,54562082,213133,7,3050,20 MHz,21050,20 MHz,266,54472472,212783,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:18:56.474148,RRC_OTA_Handover_Parsing
12,2023-02-04 16:19:01.531664,2023-02-04 16:19:01.549035,MN_HO,0.017371,266.0,3050.0,35.0,3050.0,35.0,,,Set up 1 SCell.,266,54472472,212783,7,3050,20 MHz,21050,20 MHz,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:19:01.531664,RRC_OTA_Handover_Parsing
13,2023-02-04 16:19:25.107558,2023-02-04 16:19:25.123987,MN_HO,0.016429,35.0,3050.0,400.0,3050.0,35.0,,,Set up 1 SCell.,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,400,54457368,212724,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:19:25.107558,RRC_OTA_Handover_Parsing
14,2023-02-04 16:19:26.782697,2023-02-04 16:19:26.798986,MN_HO,0.016289,400.0,3050.0,266.0,3050.0,35.0,,,Set up 1 SCell.,400,54457368,212724,7,3050,20 MHz,21050,20 MHz,266,54472472,212783,7,3050,20 MHz,21050,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:19:26.782697,RRC_OTA_Handover_Parsing


In [13]:
def interp(x, y, ratio):
    """
    Interpolation

    Args:
        x, y (datetime.datetime): x < y
        ratio (float): a decimal numeral in a range [0, 1]; 0 means break at x, 1 means break at y.
    Returns:
        (datetime.datetime): breakpoint of interpolation
    """
    return x + (y - x) * ratio

def get_ho_interval(df, sec=(1, 3), ratio=0.5,
                 ignored=['Conn_Setup','Conn_Rel'],
                 handover=['LTE_HO','SN_Rel','SN_Setup','MN_HO','SN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'],
                 linkfailure=['SCG_Failure','MCG_Failure','NAS_Recovery']):
    
    HO_INTV = namedtuple('HO_INTV', 'index, interval, state1, state2, cause', defaults=tuple([None]*4))
    
    def ignore_col(row):
        if row.ho_type in ignored:
            return False
        else:
            return True
    df = df[df.apply(ignore_col, axis=1)].reset_index(drop=True)
    
    column_names = []
    for type_name in handover + linkfailure:
        column_names += ["before_{}".format(type_name), "during_{}".format(type_name), "after_{}".format(type_name)]
    E = { col:[] for col in column_names }
    
    for i, row in df.iterrows():
        prior_row = df.iloc[i - 1] if i != 0 else None
        post_row = df.iloc[i + 1] if i != len(df)-1 else None
        ### peri_interval
        if pd.isna(row.end):
            peri_interval = P.singleton(row.start)
        else:
            peri_interval = P.closed(row.start, row.end)
        ### prior_interval
        C = row.start - pd.Timedelta(seconds=sec[0]) if row.ho_type in handover else row.start - pd.Timedelta(seconds=sec[1])
        D = row.start
        prior_interval = P.closedopen(C, D)
        if ratio != None and i != 0:
            A = max(prior_row.start, prior_row.end)
            B = max(prior_row.start, prior_row.end) + pd.Timedelta(seconds=sec[0]) if prior_row.ho_type in handover else max(prior_row.start, prior_row.end) + pd.Timedelta(seconds=sec[1])
            if P.openclosed(A, B).overlaps(prior_interval):
                # print("Overlaps with the previous!")
                bkp = interp(C, B, ratio)
                bkp = max(bkp, A)  # to avoid the breakpoint overlaps the previous event's duration
                # bkp = min(max(bkp, A), D)  # 我不侵犯到其他任何人，代表其他人也不會侵犯到我！
                prior_interval = P.closedopen(bkp, D)
                if A in prior_interval:
                    prior_interval = P.open(bkp, D)
                # blindly set as open inverval is fine, but may miss one point.
        ### post_interval
        C = row.end
        D = row.end + pd.Timedelta(seconds=sec[0]) if row.ho_type in handover else row.end + pd.Timedelta(seconds=sec[1])
        post_interval = P.openclosed(C, D)
        if ratio != None and i != len(df)-1:
            A = min(post_row.start, post_row.end) - pd.Timedelta(seconds=sec[0]) if post_row.ho_type in handover else min(post_row.start, post_row.end) - pd.Timedelta(seconds=sec[1])
            B = min(post_row.start, post_row.end)
            if P.closedopen(A, B).overlaps(post_interval):
                # print("Overlaps with the following!")
                bkp = interp(A, D, ratio)
                bkp = min(bkp, B)  # to avoid the breakpoint overlaps the following event's duration
                # bkp = max(min(bkp, B), C)  # 我不侵犯到其他任何人，代表其他人也不會侵犯到我！
                post_interval = P.open(C, bkp)
        ### append dictionary
        type_name = row.ho_type
        state1, state2= pd.NA, pd.NA
        if type_name in ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO']:
            state1 = 'inter_freq' if row.sFreq != row.tFreq else 'intra_freq'
            if row.eNB != row.eNB1:
                state2 = 'inter_enb'
            elif row.sPCI != row.tPCI:
                state2 = 'inter_sector'
            else:
                state2 = 'intra_sector'
        cause = row.cause
        E[f'before_{type_name}'].append(HO_INTV(i, prior_interval))
        E[f'during_{type_name}'].append(HO_INTV(i, peri_interval))
        E[f'after_{type_name}'].append(HO_INTV(i, post_interval))
    return E

def is_disjoint(set1, set2):
    """
    Check if two sets are disjoint.
    """
    return (set1 & set2).empty

def is_disjoint_dict(E):
    test_intv = P.empty()
    for key, val in E.items():
        # print(key)
        for intv in val:
            if is_disjoint(test_intv, intv.interval):
                test_intv = test_intv | intv.interval
            else:
                print(key, intv.index)
                return False
    return True

In [14]:
df = df_ho_ul.copy()
E = get_ho_interval(df)
print(is_disjoint_dict(E))

True


In [30]:
df = df_ul.copy()
for key, val in E.items():
    for intv in val:
        if intv.interval.empty:
            continue
        print(intv.interval)
        df.loc[(df['transmit_time'] >= intv.interval.lower) & (df['transmit_time'] <= intv.interval.upper),
               ('ho_index','ho_state')] = [intv.index, key]

[Timestamp('2023-02-04 16:21:12.772917'),Timestamp('2023-02-04 16:21:13.772917'))
[Timestamp('2023-02-04 16:21:13.772917'),Timestamp('2023-02-04 16:21:13.807112')]
(Timestamp('2023-02-04 16:21:13.807112'),Timestamp('2023-02-04 16:21:13.975545500'))
[Timestamp('2023-02-04 16:17:26.953435'),Timestamp('2023-02-04 16:17:27.953435'))
[Timestamp('2023-02-04 16:18:09.084393'),Timestamp('2023-02-04 16:18:10.084393'))
[Timestamp('2023-02-04 16:19:27.725792'),Timestamp('2023-02-04 16:19:28.192541'))
[Timestamp('2023-02-04 16:17:27.953435'),Timestamp('2023-02-04 16:17:27.986813')]
[Timestamp('2023-02-04 16:18:10.084393'),Timestamp('2023-02-04 16:18:10.117903')]
[Timestamp('2023-02-04 16:19:28.192541'),Timestamp('2023-02-04 16:19:28.230660')]
(Timestamp('2023-02-04 16:17:27.986813'),Timestamp('2023-02-04 16:17:28.071758'))
(Timestamp('2023-02-04 16:18:10.117903'),Timestamp('2023-02-04 16:18:10.265375'))
(Timestamp('2023-02-04 16:19:28.230660'),Timestamp('2023-02-04 16:19:28.317494500'))
[Timestamp

In [31]:
df.to_csv('test.csv')