# Import Modules

In [1]:
import os
import pandas as pd
import numpy as np
import datetime as dt
from collections import namedtuple

In [2]:
def udp_set_data(df):
    df = df.rename(columns={
        'sequence.number':'sequence_num',
        'transmit.time':'transmit_time',
        'transmit.time_epoch':'transmit_time_epoch',
        'arrival.time':'arrival_time',
        'arrival.time_epoch':'arrival_time_epoch',
    })
    df['sequence_num'] = df['sequence_num'].astype('Int32')
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df['transmit_time'] = pd.to_datetime(df['transmit_time'])
    df['arrival_time'] = pd.to_datetime(df['arrival_time'])
    df['lost'] = df['lost'].astype('boolean')
    df['excl'] = df['excl'].astype('boolean')
    # df['Timestamp_epoch'] = df['Timestamp_epoch'].astype('float32')
    # df['transmit_time_epoch'] = df['transmit_time_epoch'].astype('float32')
    # df['arrival_time_epoch'] = df['arrival_time_epoch'].astype('float32')
    df['latency'] = df['latency'].astype('float32')
    df = df.drop(['Timestamp_epoch','transmit_time_epoch','arrival_time_epoch'], axis=1)
    return df

# Test Merge, Groupby

In [3]:
df = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01/data/udp_dnlk_loss_latency.csv")
df = udp_set_data(df)
df['Timestamp'] = df['Timestamp'].dt.round('S')
ts_group = df.groupby(['Timestamp'])

In [4]:
print(list(df.columns))

['sequence_num', 'Timestamp', 'lost', 'excl', 'latency', 'transmit_time', 'arrival_time']


In [5]:
df_lost = ts_group['lost'].agg(['count','sum','min','max','mean','median']).head(7).reset_index()
df_excl = ts_group['excl'].agg(['count','sum','min','max','mean','median']).head(3).reset_index()
df_lost = df_lost.rename(columns={'count':'count_1'})
df_excl = df_excl.rename(columns={'count':'count_2'})
display(df_lost)
display(df_excl)

Unnamed: 0,Timestamp,count_1,sum,min,max,mean,median
0,2023-02-04 14:58:57,309,0,False,False,0.0,0.0
1,2023-02-04 14:58:58,500,0,False,False,0.0,0.0
2,2023-02-04 14:58:59,500,0,False,False,0.0,0.0
3,2023-02-04 14:59:00,500,0,False,False,0.0,0.0
4,2023-02-04 14:59:01,500,0,False,False,0.0,0.0
5,2023-02-04 14:59:02,500,0,False,False,0.0,0.0
6,2023-02-04 14:59:03,499,0,False,False,0.0,0.0


Unnamed: 0,Timestamp,count_2,sum,min,max,mean,median
0,2023-02-04 14:58:57,309,0,False,False,0.0,0.0
1,2023-02-04 14:58:58,500,0,False,False,0.0,0.0
2,2023-02-04 14:58:59,500,0,False,False,0.0,0.0


In [6]:
df_t = pd.merge(df_lost, df_excl, on='Timestamp', how='left')
display(df_t)

Unnamed: 0,Timestamp,count_1,sum_x,min_x,max_x,mean_x,median_x,count_2,sum_y,min_y,max_y,mean_y,median_y
0,2023-02-04 14:58:57,309,0,False,False,0.0,0.0,309.0,0.0,False,False,0.0,0.0
1,2023-02-04 14:58:58,500,0,False,False,0.0,0.0,500.0,0.0,False,False,0.0,0.0
2,2023-02-04 14:58:59,500,0,False,False,0.0,0.0,500.0,0.0,False,False,0.0,0.0
3,2023-02-04 14:59:00,500,0,False,False,0.0,0.0,,,,,,
4,2023-02-04 14:59:01,500,0,False,False,0.0,0.0,,,,,,
5,2023-02-04 14:59:02,500,0,False,False,0.0,0.0,,,,,,
6,2023-02-04 14:59:03,499,0,False,False,0.0,0.0,,,,,,


# Round Into Seconds

In [7]:
df = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/udp_dnlk_loss_latency.csv")
df_ho = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/diag_log_qc02_2023-02-04_16-13-28_rrc.csv")
df_lte = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/diag_log_qc02_2023-02-04_16-13-28_ml1.csv")
df_nr = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/diag_log_qc02_2023-02-04_16-13-28_nr_ml1.csv")

## packet statistics

* lost, excl, latency

In [8]:
# df = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01/data/udp_dnlk_loss_latency.csv")
df = udp_set_data(df)
df['Timestamp'] = df['Timestamp'].dt.round('S')
dfr = df[df['lost'] == False].copy().reset_index(drop=True)
dfr['diff'] = dfr['latency'].diff().abs()
ts_group = df.groupby(['Timestamp'])
tsr_group = dfr.groupby(['Timestamp'])

In [9]:
# display(ts_group['lost'].agg(['count','sum','min','max','mean','median','std','var']))
# display(tsr_group['excl'].agg(['count','sum','min','max','mean','median','std','var']))
# display(tsr_group['latency'].agg(['count','sum','min','max','mean','median','std','var']))
# display(tsr_group['diff'].agg(['count','sum','min','max','mean','median','std','var']))

In [10]:
dfagg = ts_group['lost'].agg(['count','sum','mean']).copy().reset_index()
dfagg = dfagg.rename(columns={'count':'tx_count', 'sum':'loss', 'mean':'lorate'})
dfagg['lorate'] = dfagg['lorate']*100

dfagg1 = tsr_group['excl'].agg(['count','sum','mean']).copy().reset_index()
dfagg1 = dfagg1.rename(columns={'count':'rx_count', 'sum':'excl', 'mean':'exrate'})
dfagg1['exrate'] = dfagg1['exrate']*100
dfagg = pd.merge(dfagg, dfagg1, on='Timestamp', how='outer').copy().reset_index(drop=True)

dfagg1 = ts_group['excl'].agg(['mean']).copy().reset_index()
dfagg1 = dfagg1.rename(columns={'mean':'loexrate'})
dfagg1['loexrate'] = dfagg1['loexrate']*100
dfagg = pd.merge(dfagg, dfagg1, on='Timestamp', how='outer').copy().reset_index(drop=True)

dfagg1 = tsr_group['latency'].agg(['mean','min','max','median','std']).copy().reset_index()
dfagg1 = dfagg1.add_suffix('_lat')
dfagg1 = dfagg1.rename(columns={'Timestamp_lat':'Timestamp'})
dfagg = pd.merge(dfagg, dfagg1, on='Timestamp', how='outer').copy().reset_index(drop=True)

dfagg1 = tsr_group['diff'].agg(['mean']).copy().reset_index()
dfagg1 = dfagg1.rename(columns={'mean':'jitter'})
dfagg = pd.merge(dfagg, dfagg1, on='Timestamp', how='outer').copy().reset_index(drop=True)
# jitter := average of a bunch of "absolute value of first-order difference"

# with pd.option_context('display.max_rows',None):
#     display(dfagg)
dfagg

Unnamed: 0,Timestamp,tx_count,loss,lorate,rx_count,excl,exrate,loexrate,mean_lat,min_lat,max_lat,median_lat,std_lat,jitter
0,2023-02-04 16:16:37,327,0,0.0,327.0,0,0.0,0.0,0.007812,0.003343,0.017330,0.007374,0.002645,0.002646
1,2023-02-04 16:16:38,500,0,0.0,500.0,0,0.0,0.0,0.007973,0.003393,0.017437,0.007427,0.002421,0.002653
2,2023-02-04 16:16:39,500,0,0.0,500.0,0,0.0,0.0,0.008141,0.003444,0.017448,0.007457,0.002339,0.002660
3,2023-02-04 16:16:40,500,0,0.0,500.0,0,0.0,0.0,0.008203,0.003477,0.017497,0.007498,0.002335,0.002660
4,2023-02-04 16:16:41,500,0,0.0,500.0,0,0.0,0.0,0.008135,0.003500,0.015574,0.007531,0.002162,0.002653
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,2023-02-04 16:25:22,500,0,0.0,500.0,0,0.0,0.0,0.050420,0.046831,0.058936,0.050893,0.002615,0.002645
526,2023-02-04 16:25:23,500,0,0.0,500.0,0,0.0,0.0,0.050415,0.046908,0.058923,0.050910,0.002459,0.002643
527,2023-02-04 16:25:24,500,0,0.0,500.0,0,0.0,0.0,0.050299,0.046923,0.058966,0.050948,0.002429,0.002641
528,2023-02-04 16:25:25,500,0,0.0,500.0,0,0.0,0.0,0.051182,0.046987,0.057130,0.051043,0.002571,0.002650


## Cell INFO

In [11]:
def nr_serving_cell(row):
    pos = row.Serv_Cell_Pos
    if pos == 255:
        return None, None, None
    else:
        return row[f'PCI{pos}'], row[f'RSRP{pos}'], row[f'RSRQ{pos}']

def mi_parse_cell(df, rat='lte', tz=0):
    ### LTE
    if rat == 'lte':
        df = df.rename(columns={
            'type_id': 'Type_ID',
            'RSRP(dBm)': 'RSRP',
            'RSRQ(dB)': 'RSRQ',
            'Serving Cell Index': 'Serv_Cell_Index',
            'Number of Neighbor Cells': 'Num_Neigh_Cells',
            'Number of Detected Cells': 'Num_Cells',
        })
        df = df.reindex(['Timestamp', 'Type_ID', 'PCI', 'RSRP', 'RSRQ', 'Serv_Cell_Index', 'EARFCN', 'Raster_ARFCN',
                        'Num_Cells', 'Num_Neigh_Cells', 'Serv_Cell_Pos', 'Serv_Cell_PCI',
                        'PCI0', 'RSRP0', 'RSRQ0', *df.columns.to_list()[df.columns.get_loc('PCI1'):]], axis=1)
        df.loc[df['Serv_Cell_Index'] == '(MI)Unknown', 'Serv_Cell_Index'] = '3_SCell'
    ### NR
    if rat == 'nr':
        df = df.rename(columns={
            'type_id': 'Type_ID',
            'Raster ARFCN': 'Raster_ARFCN',
            'Serving Cell Index': 'Serv_Cell_Pos',
            'Serving Cell PCI': 'Serv_Cell_PCI',
            'Num Cells': 'Num_Cells',
        })
        df = df.reindex(['Timestamp', 'Type_ID', 'PCI', 'RSRP', 'RSRQ', 'Serv_Cell_Index', 'EARFCN', 'Raster_ARFCN',
                        'Num_Cells', 'Num_Neigh_Cells', 'Serv_Cell_Pos', 'Serv_Cell_PCI',
                        'PCI0', 'RSRP0', 'RSRQ0', *df.columns.to_list()[df.columns.get_loc('PCI1'):]], axis=1)
        df.loc[df['Serv_Cell_Pos'] != 255, 'Serv_Cell_Index'] = 'PSCell'
        df[['PCI','RSRP','RSRQ']] = df.apply(nr_serving_cell, axis=1, result_type='expand')
    ### Set dtypes
    df['Timestamp'] = pd.to_datetime(df['Timestamp']) + pd.Timedelta(hours=tz)
    df['Type_ID'] = df['Type_ID'].astype('category')
    df['Serv_Cell_Index'] = df['Serv_Cell_Index'].astype('category')
    df['EARFCN'] = df['EARFCN'].astype('Int32')
    df['Raster_ARFCN'] = df['Raster_ARFCN'].astype('Int32')
    df['Num_Cells'] = df['Num_Cells'].astype('UInt8')
    df['Num_Neigh_Cells'] = df['Num_Neigh_Cells'].astype('UInt8')
    df['Serv_Cell_Pos'] = df['Serv_Cell_Pos'].astype('UInt8')
    df['Serv_Cell_PCI'] = df['Serv_Cell_PCI'].astype('UInt16')
    for tag in df.columns:
        if tag.startswith('PCI'):
            df[tag] = df[tag].astype('Int32')
        if tag.startswith(('RSRP','RSRQ')):
            df[tag] = df[tag].astype('float32')
    return df

### LTE

In [12]:
# df_lte = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01/data/diag_log_qc01_2023-02-04_14-57-22_ml1.csv")
df_lte = mi_parse_cell(df_lte, 'lte', +8)
df_lte = df_lte[df_lte['Serv_Cell_Index'] == 'PCell'].copy().reset_index(drop=True)
df_lte['Timestamp'] = df_lte['Timestamp'].dt.round('S')
tsl_group = df_lte.groupby(['Timestamp'])

In [13]:
# display(tsl_group['RSRP'].agg(['count','sum','min','max','mean','median','std','var']))
# display(tsl_group['RSRQ'].agg(['count','sum','min','max','mean','median','std','var']))

In [14]:
dfagg_lte = df_lte.loc[~df_lte.duplicated(subset=['Timestamp']), ['Timestamp','PCI','EARFCN']].copy().reset_index(drop=True)

dfagg_lte1 = tsl_group['RSRP'].agg(['mean']).copy().reset_index()
dfagg_lte1 =  dfagg_lte1.rename(columns={'mean':'RSRP'})
dfagg_lte = pd.merge(dfagg_lte, dfagg_lte1, on='Timestamp', how='outer').copy().reset_index(drop=True)

dfagg_lte1 = tsl_group['RSRQ'].agg(['mean']).copy().reset_index()
dfagg_lte1 =  dfagg_lte1.rename(columns={'mean':'RSRQ'})
dfagg_lte = pd.merge(dfagg_lte, dfagg_lte1, on='Timestamp', how='outer').copy().reset_index(drop=True)

dfagg_lte

Unnamed: 0,Timestamp,PCI,EARFCN,RSRP,RSRQ
0,2023-02-04 16:14:56,35,3050,-86.604164,-9.750000
1,2023-02-04 16:14:57,35,3050,-82.812500,-8.625000
2,2023-02-04 16:14:58,35,3050,-89.750000,-9.000000
3,2023-02-04 16:14:59,35,3050,-86.375000,-9.812500
4,2023-02-04 16:15:00,35,3050,-84.062500,-9.000000
...,...,...,...,...,...
524,2023-02-04 16:25:37,174,3050,-98.843750,-13.281250
525,2023-02-04 16:25:38,174,3050,-100.750000,-13.937500
526,2023-02-04 16:25:39,174,3050,-98.203125,-13.296875
527,2023-02-04 16:25:40,174,3050,-97.625000,-13.468750


### NR

In [15]:
# df_nr = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01/data/diag_log_qc01_2023-02-04_14-57-22_nr_ml1.csv")
df_nr = mi_parse_cell(df_nr, 'nr', +8)
df_nr = df_nr[df_nr['Serv_Cell_Index'] == 'PSCell'].copy().reset_index(drop=True)
df_nr['Timestamp'] = df_nr['Timestamp'].dt.round('S')
tsn_group = df_nr.groupby(['Timestamp'])

In [16]:
dfagg_nr = df_nr.loc[~df_nr.duplicated(subset=['Timestamp']), ['Timestamp','PCI','Raster_ARFCN']].copy().reset_index(drop=True)
dfagg_nr = dfagg_nr.rename(columns={'PCI':'nrPCI', 'Raster_ARFCN':'nrARFCN'})

dfagg_nr1 = tsn_group['RSRP'].agg(['mean']).copy().reset_index()
dfagg_nr1 =  dfagg_nr1.rename(columns={'mean':'nrRSRP'})
dfagg_nr = pd.merge(dfagg_nr, dfagg_nr1, on='Timestamp', how='outer').copy().reset_index(drop=True)

dfagg_nr1 = tsn_group['RSRQ'].agg(['mean']).copy().reset_index()
dfagg_nr1 =  dfagg_nr1.rename(columns={'mean':'nrRSRQ'})
dfagg_nr = pd.merge(dfagg_nr, dfagg_nr1, on='Timestamp', how='outer').copy().reset_index(drop=True)

dfagg_nr

Unnamed: 0,Timestamp,nrPCI,nrARFCN,nrRSRP,nrRSRQ
0,2023-02-04 16:14:56,35,631000,-71.234337,-10.539001
1,2023-02-04 16:14:57,35,631000,-73.335800,-10.822001
2,2023-02-04 16:14:58,35,631000,-75.050667,-11.320333
3,2023-02-04 16:14:59,35,631000,-77.750000,-11.686000
4,2023-02-04 16:15:00,35,631000,-72.275253,-10.976750
...,...,...,...,...,...
574,2023-02-04 16:25:37,350,631000,-87.014000,-13.666000
575,2023-02-04 16:25:38,350,631000,-87.296753,-13.230250
576,2023-02-04 16:25:39,350,631000,-88.152252,-13.459000
577,2023-02-04 16:25:40,350,631000,-86.300995,-13.177750


## Concat stage 1

In [17]:
dfagg_cell = pd.merge(dfagg_lte, dfagg_nr, on='Timestamp', how='outer').copy().reset_index(drop=True)
dfagg_cell

Unnamed: 0,Timestamp,PCI,EARFCN,RSRP,RSRQ,nrPCI,nrARFCN,nrRSRP,nrRSRQ
0,2023-02-04 16:14:56,35,3050,-86.604164,-9.7500,35,631000,-71.234337,-10.539001
1,2023-02-04 16:14:57,35,3050,-82.812500,-8.6250,35,631000,-73.335800,-10.822001
2,2023-02-04 16:14:58,35,3050,-89.750000,-9.0000,35,631000,-75.050667,-11.320333
3,2023-02-04 16:14:59,35,3050,-86.375000,-9.8125,35,631000,-77.750000,-11.686000
4,2023-02-04 16:15:00,35,3050,-84.062500,-9.0000,35,631000,-72.275253,-10.976750
...,...,...,...,...,...,...,...,...,...
574,2023-02-04 16:20:48,,,,,386,631000,-91.259003,-16.279833
575,2023-02-04 16:20:50,,,,,394,631000,-89.276001,-13.005166
576,2023-02-04 16:20:55,,,,,394,631000,-92.134163,-14.554833
577,2023-02-04 16:20:58,,,,,394,631000,-93.569160,-14.570167


In [18]:
dfagg = pd.merge(dfagg, dfagg_cell, on='Timestamp', how='left').copy().reset_index(drop=True)

In [19]:
print(dfagg['PCI'].isna().sum())
print(dfagg['nrPCI'].isna().sum())
dfagg

44
0


Unnamed: 0,Timestamp,tx_count,loss,lorate,rx_count,excl,exrate,loexrate,mean_lat,min_lat,...,std_lat,jitter,PCI,EARFCN,RSRP,RSRQ,nrPCI,nrARFCN,nrRSRP,nrRSRQ
0,2023-02-04 16:16:37,327,0,0.0,327.0,0,0.0,0.0,0.007812,0.003343,...,0.002645,0.002646,35,3050,-86.250000,-7.750000,35,631000,-80.765663,-14.487000
1,2023-02-04 16:16:38,500,0,0.0,500.0,0,0.0,0.0,0.007973,0.003393,...,0.002421,0.002653,35,3050,-86.375000,-9.562500,35,631000,-81.630333,-14.605499
2,2023-02-04 16:16:39,500,0,0.0,500.0,0,0.0,0.0,0.008141,0.003444,...,0.002339,0.002660,35,3050,-86.562500,-8.812500,35,631000,-81.191338,-14.657500
3,2023-02-04 16:16:40,500,0,0.0,500.0,0,0.0,0.0,0.008203,0.003477,...,0.002335,0.002660,,,,,35,631000,-78.790001,-14.100429
4,2023-02-04 16:16:41,500,0,0.0,500.0,0,0.0,0.0,0.008135,0.003500,...,0.002162,0.002653,35,3050,-86.812500,-10.375000,35,631000,-79.377335,-14.274501
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,2023-02-04 16:25:22,500,0,0.0,500.0,0,0.0,0.0,0.050420,0.046831,...,0.002615,0.002645,378,3050,-99.775002,-11.150000,350,631000,-88.063835,-12.416833
526,2023-02-04 16:25:23,500,0,0.0,500.0,0,0.0,0.0,0.050415,0.046908,...,0.002459,0.002643,378,3050,-101.666664,-12.208333,350,631000,-86.216431,-11.900429
527,2023-02-04 16:25:24,500,0,0.0,500.0,0,0.0,0.0,0.050299,0.046923,...,0.002429,0.002641,378,3050,-98.718750,-10.687500,350,631000,-85.273499,-11.729000
528,2023-02-04 16:25:25,500,0,0.0,500.0,0,0.0,0.0,0.051182,0.046987,...,0.002571,0.002650,378,3050,-98.666664,-10.520833,350,631000,-85.642174,-11.847833


## Handover

In [20]:
class myQueue:
    def __init__(self, maxsize=0):
        self.data = []
        self.maxsize = maxsize if maxsize > 0 else float('inf')
    def tolist(self):
        return self.data
    def size(self):
        return self.maxsize
    def len(self):
        return len(self.data)
    def empty(self):
        return self.len() == 0
    def full(self):
        return self.len() == self.maxsize
    def clear(self):
        self.data = []
    def pop(self, index=0):
        """
        if index > 0, recursively pop() until pop out the specific element.
        return the final popped-out element.
        """
        for _ in range(index, 0, -1):
            self.pop()
        return self.data.pop(0) if not self.empty() else None
    def push(self, element):
        """
        return 0 if success; 1 if the front is popped.
        """
        flag = 0
        if self.full():
            self.pop()
            flag = 1
        self.data.append(element)
        return flag
    def front(self):
        return self.data[0] if not self.empty() else None
    def rear(self):
        return self.data[-1] if not self.empty() else None
    def get(self, index):
        if isinstance(index, list):
            tmp = []
            for i in index:
                tmp = [*tmp, self.get(i)]
            return tmp
        return self.data[index] if index < self.len() and abs(index) <= self.len() else None
    def find(self, element):
        if isinstance(element, list):
            for ele in element:
                index = self.find(ele)
                if index != None:
                    return index
            return None
        return self.data.index(element) if element in self.data else None

In [21]:
def mi_parse_ho(df, tz=0, debug=False):
    df['Timestamp'] = pd.to_datetime(df['Timestamp']) + pd.Timedelta(hours=tz)
    
    ### Define Basic Element
    HO = namedtuple('HO', 'start, end, cause, others, st_scell', defaults=tuple([None]*4+[0]))
    stNR = namedtuple('stNR', 'snrPCI, tnrPCI', defaults=tuple([None]*2))
    stLTE = namedtuple('stLTE', 'sPCI, sFreq, tPCI, tFreq', defaults=tuple([None]*4))
    NR_CEL = namedtuple('NR_CEL', 'nrPCI, nrFreq', defaults=tuple([None]*2))
    LTE_CEL = namedtuple('LTE_CEL', 'ePCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW', defaults=tuple([None]*8))
    C = namedtuple('C', HO._fields + stLTE._fields + stNR._fields + \
        LTE_CEL._fields + tuple([f'{s}1' for s in LTE_CEL._fields]) + NR_CEL._fields + tuple([f'{s}1' for s in NR_CEL._fields]), 
        defaults=tuple([None]*30))
    
    def dprint(*args, **kwargs):
        if debug:
            print(*args, **kwargs)
    
    def NR_OTA(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if row.type_id == '5G_NR_RRC_OTA_Packet':
            return True
        else:
            return False
    
    def CEL_INFO(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if row.type_id == 'LTE_RRC_Serv_Cell_Info':
            return True
        else:
            return False
    
    def nr_track(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if int(row.PCI) in [0, 65535]:  # 65535 is for samgsung; 0 is for xiaomi.
            return NR_CEL()
        else:
            return NR_CEL(int(row.PCI), int(row.Freq))
    
    def eci_track(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        PCI = int(row['PCI'])
        ECI = int(row['Cell Identity'])
        eNB = ECI // 256
        BID = int(row['Band ID'])
        DL_Freq = int(row['DL frequency'])
        DL_BW = row['DL bandwidth']
        UL_Freq = int(row['UL frequency'])
        UL_BW = row['UL bandwidth']
        return LTE_CEL(PCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW)
    
    def peek_nr(pos=None, look_after=0.5, look_before=0.0):
        ## look_after == 0.5 is a magic number
        ### TODO 先偷看 ho start - end 之間的 cell information
        if pos:  # position of end of an event
            for j in range(i, pos):
                if NR_OTA(j):
                    qpscell.push(nr_track(j))
        ### END TODO
        # dprint(f'pscell={pscell}')
        # dprint(qpscell.tolist())
        index = None
        for j in range(qpscell.len()):
            if pscell != qpscell.get(j):
                index = j
                break
        # dprint(f'index={index}')
        if index != None:
            return qpscell.pop(index)
        ### haven't find pci change yet!
        t = df['Timestamp'].iloc[i]
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df["Timestamp"].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                break
            if df['type_id'].iloc[j] != '5G_NR_RRC_OTA_Packet':
                continue
            row = df.iloc[j]
            if int(row.PCI) in [0, 65535]:  # 65535 is for samgsung; 0 is for xiaomi.
                return NR_CEL()
            else:
                return NR_CEL(int(row.PCI), int(row.Freq))
        return pscell
    
    def peek_eci(pos=None, look_after=0.5, look_before=0.0):
        ## look_after == 0.5 is a magic number
        ### TODO 先偷看 ho start - end 之間的 cell information
        if pos:  # position of end of an event
            for j in range(i, pos):
                if CEL_INFO(j):
                    qpcell.push(eci_track(j))
        ### END TODO
        # dprint(f'pcell={pcell}')
        # dprint(qpcell.tolist())
        index = None
        for j in range(qpcell.len()):
            if pcell != qpcell.get(j):
                index = j
                break
        # dprint(f'index={index}')
        if index != None:
            return qpcell.pop(index)
        ### haven't find pci change yet!
        t = df['Timestamp'].iloc[i]
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df['Timestamp'].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                break
            if df['type_id'].iloc[j] != 'LTE_RRC_Serv_Cell_Info':
                continue
            row = df.iloc[j]
            PCI = int(row['PCI'])
            ECI = int(row['Cell Identity'])
            eNB = ECI // 256
            BID = int(row['Band ID'])
            DL_Freq = int(row['DL frequency'])
            DL_BW = row['DL bandwidth']
            UL_Freq = int(row['UL frequency'])
            UL_BW = row['UL bandwidth']
            return LTE_CEL(PCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW)
        return pcell

    def find_1st_after(target, look_after=1.0):
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df["Timestamp"].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] in [1,'1']:
                return t1, j  # timestamp & position
        return None, None

    def find_1st_before(target, look_before=1.0):
        for j in range(i, -1, -1):  # 倒退嚕，最多走回頭
            t1 = df["Timestamp"].iloc[j]
            if (t - t1).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] in [1,'1']:
                return t1, j  # timestamp & position
        return None, None

    D = {
        ### Conn Setup/Rel & HO
        'Conn_Rel':[],    # Conn Release: rrcConnectionRelease
        'Conn_Setup':[],  # Conn Setup: rrcConnectionRequest + rrcConnectionSetup
        'LTE_HO': [],     # E_PCel -> E_PCel’: lte-rrc.t304 & LTE_PCel does change
        'SN_Rel': [],     # EUTRA + NR -> EUTRA:(CHT) lte-rrc.t304 & LTE_PCel does not change
                          #                     (TWM) nr-Config-r15: release (0) 
        'SN_Setup': [],   # EUTRA -> EUTRA + NR:(CHT) lte-rrc.t304 + nr-rrc.t304 + dualConnectivityPHR: setup (1) & LTE_PCel does not change
                          #                     (TWM) nr-rrc.t304 + dualConnectivityPHR: setup (1)
        'MN_HO': [],      # E_PCel + N_PSCel -> E_PCel’ + N_PSCel: lte-rrc.t304 + nr-rrc.t304 + dualConnectivityPHR: setup (1) & LTE_PCel does change
        'SN_HO': [],      # E_PCel + N_PSCel -> E_PCel + N_PSCel’: nr-rrc.t304
        'MNSN_HO': [],         # (TWM)
        'SN_Rel_MN_HO': [],    # (TWM)
        'SN_Setup_MN_HO': [],  # (TWM)
        ### Link Failure
        'SCG_Failure': [],   # scgFailureInformationNR-r15
        'MCG_Failure': [],   # rrcConnectionReestablishmentRequest + rrcConnectionReestablishmentComplete
        'NAS_Recovery': [],  # rrcConnectionReestablishmentRequest + rrcConnectionReestablishmentReject + rrcConnectionRequest + rrcConnectionSetup
        # MCG_Failure, NAS_Recovery may be caused by 'reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)'
        }
    
    A = { 'Conn_Rel':[], 'Conn_Setup':[],
        'LTE_HO': [], 'SN_Rel': [], 'SN_Setup': [], 'MN_HO': [], 'SN_HO': [],
        'MNSN_HO': [], 'SN_Rel_MN_HO': [], 'SN_Setup_MN_HO': [],
        'SCG_Failure': [], 'MCG_Failure': [], 'NAS_Recovery': [] }
    
    qpscell = myQueue(3)
    qpcell = myQueue(3)
    
    init = 1
    pcell, pscell = LTE_CEL(), NR_CEL()
    prev_pci, prev_freq = None, None
    
    for i, row in df.iterrows():
        if NR_OTA():
            qpscell.push(nr_track())
            continue
        elif CEL_INFO():
            qpcell.push(eci_track())
            continue
        if init:
            t_init, pci_init, freq_init = row.Timestamp, int(row.PCI), int(row.Freq)
            pcell = LTE_CEL(ePCI=pci_init, DL_Freq=freq_init)
            dprint(f"{t_init} | Initial PCI={pci_init} EARFCN={freq_init}")
            dprint()
            init = 0
        
        t, pci, freq = row.Timestamp, int(row.PCI), int(row.Freq)
        
        if (prev_pci, prev_freq) != (pci, freq):
            for j in range(i, len(df)):  # 往前走，最多走到底
                if CEL_INFO(j):
                    next_pcell = eci_track(j)
                    if next_pcell[0] == pci:
                        qpcell.push(next_pcell)
                        break
                elif not NR_OTA(j):
                    if df['PCI'].iloc[j] != pci:
                        break
        
        if not qpscell.empty():
            pscell = qpscell.pop()
        if not qpcell.empty():
            pcell = qpcell.pop()
        
        ### Conn_Rel
        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))
            A['Conn_Rel'].append(C(*HO(start=t), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0]), *pcell, *LTE_CEL(), *pscell, *NR_CEL()))
            dprint(f"{t}, {pd.NaT} | Conn_Rel at PCI={pci} EARFCN={freq}.")
            dprint(f'{tuple(pcell)} -> {tuple(LTE_CEL())}')
            dprint(f'{tuple(pscell)} ->{tuple(NR_CEL())}')
            pcell, pscell = LTE_CEL(), NR_CEL()
            dprint()

        ### Conn_Setup
        if df["rrcConnectionRequest"].iloc[i] == 1:
            a, j1 = find_1st_after('rrcConnectionReconfigurationComplete',look_after=2)
            b, j2 = find_1st_after('securityModeComplete',look_after=2)
            end = a if a > b else b
            j = j1 if a > b else j2
            _pcell = peek_eci(pos=j)
            D['Conn_Setup'].append(HO(start=t, end=end))
            A['Conn_Setup'].append(C(*HO(start=t, end=end), *stLTE(tPCI=pci, tFreq=freq), *stNR(), *pcell, *_pcell, *pscell, *pscell))
            dprint(f"{t}, {end} | Conn_Setup to PCI={pci} EARFCN={freq}.")
            dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
            dprint()
        
        ### SN_Setup, SN_Rel, MO_HO, LTE_HO
        if df["lte-rrc.t304"].iloc[i] == 1:
            end, j = find_1st_after('rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = pci, int(df['lte_targetPhysCellId'].iloc[i])
            serv_freq, target_freq = freq, int(df['dl-CarrierFreq'].iloc[i])
            nr_target_cell = int(df["nr_physCellId"].iloc[i])
            
            n = 0
            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others=f'Set up {n} SCell.'
            else:
                others=None
            
            if serv_freq != target_freq:
                others = f'{others} Inter-Freq HO.' if others else 'Inter-Freq HO.'
            
            ### SN_Setup, MN_HO
            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                ### SN_Setup
                if serv_cell == target_cell and serv_freq == target_freq:
                    _pscell = peek_nr(pos=j)
                    D['SN_Setup'].append(HO(start=t, end=end, others=others, st_scell=n))
                    A['SN_Setup'].append(C(*HO(start=t, end=end, others=others, st_scell=n), *stLTE(sPCI=serv_cell, sFreq=serv_freq), *stNR(tnrPCI=nr_target_cell), *pcell, *pcell, *pscell, *_pscell))
                    dprint(f"{t}, {end} | SN_Setup to nrPCI={nr_target_cell} | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
                    dprint()
                else:
                ### MN_HO
                    _pcell = peek_eci(pos=j)
                    D['MN_HO'].append(HO(start=t, end=end, others=others, st_scell=n))
                    A['MN_HO'].append(C(*HO(start=t, end=end, others=others, st_scell=n), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *pscell))
                    dprint(f"{t}, {end} | MN_HO ({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
                    dprint()
            else:
            ### SN_Rel, LTE_HO
                ### SN_Rel
                if serv_cell == target_cell and serv_freq == target_freq:
                    a, b = find_1st_before("scgFailureInformationNR-r15")
                    if a is not None:
                        others = f'{others} Caused by scg-failure.' if others else 'Caused by scg-failure.'
                    D['SN_Rel'].append(HO(start=t, end=end, others=others, st_scell=n))
                    A['SN_Rel'].append(C(*HO(start=t, end=end, others=others, st_scell=n), *stLTE(sPCI=serv_cell, sFreq=serv_freq), *stNR(snrPCI=pscell[0]), *pcell, *pcell, *pscell, *NR_CEL()))
                    dprint(f"{t}, {end} | SN_Rel at nrPCI={pscell[0]} | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                    pscell = NR_CEL()
                    dprint()
                else:
                ### LTE_HO
                    _pcell = peek_eci(pos=j)
                    D['LTE_HO'].append(HO(start=t, end=end, others=others, st_scell=n))
                    A['LTE_HO'].append(C(*HO(start=t, end=end, others=others, st_scell=n), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(), *pcell, *_pcell, *pscell, *pscell))
                    dprint(f"{t}, {end} | LTE_HO ({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
                    dprint()

        ### SN_HO
        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
            end, j = find_1st_after('rrcConnectionReconfigurationComplete')
            nr_target_cell = int(df["nr_physCellId"].iloc[i])
            _pscell = peek_nr(pos=j)
            D['SN_HO'].append(HO(start=t, end=end))
            A['SN_HO'].append(C(*HO(start=t, end=end), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0], tnrPCI=nr_target_cell), *pcell, *pcell, *pscell, *_pscell))
            dprint(f"{t}, {end} | SN_HO to nrPCI={nr_target_cell}")
            dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
            dprint()

        ### SCG_Failure
        if df["scgFailureInformationNR-r15"].iloc[i] == 1:
            # others = df["failureType-r15"].iloc[i]
            cause = df["failureType-r15"].iloc[i]
            _pscell = peek_nr()
            D['SCG_Failure'].append(HO(start=t, cause=cause))  # end time??
            A['SCG_Failure'].append(C(*HO(start=t, cause=cause), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0]), *pcell, *pcell, *pscell, *_pscell))
            dprint(f"{t}, {pd.NaT} | SCG_Failure at nrPCI={pscell[0]} | {cause}")
            dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
            ### SCG Fail 之後必定會 SN Rel
            dprint()
        
        ### MCG_Failure (type II), NAS_Recovery (type III)
        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:
            end1, j1 = find_1st_after('rrcConnectionReestablishmentComplete', look_after=1)
            end2, j2 = find_1st_after('rrcConnectionReestablishmentReject', look_after=1)
            end3, j3 = find_1st_after('rrcConnectionRequest', look_after=1)
            # others = df["reestablishmentCause"].iloc[i]
            cause = df["reestablishmentCause"].iloc[i]
            # target_cell = int(df['physCellId.3'].iloc[i])
            serv_cell, target_cell = pci, int(df['physCellId.3'].iloc[i])
            serv_freq, target_freq = freq, None
            
            ### MCG_Failure (type II)
            if (end1 and not end2) or (end1 and end2 and end1 < end2):
                # dprint(end1, end2)
                end, j = end1, j1
                _pcell = peek_eci()
                D['MCG_Failure'].append(HO(start=t, end=end, cause=cause))
                A['MCG_Failure'].append(C(*HO(start=t, end=end, cause=cause), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *NR_CEL()))
                dprint(f"{t}, {end} | MCG_Failure PCI={serv_cell} -> PCI={target_cell}, recconected to {pci} | {cause}")
                dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                pscell = NR_CEL()
                dprint()
                ### MCG Fail 之後有機會不經過 RRC Connection Setup 就 Reconnect
            else: 
            ### NAS_Recovery (type III)
                # dprint(end1, end2)
                end, j = end3, j3
                _pcell = peek_eci()
                D['NAS_Recovery'].append(HO(start=t, end=end-pd.Timedelta(microseconds=1) if end else None, cause=cause))  # end time??
                A['NAS_Recovery'].append(C(*HO(start=t, end=end-pd.Timedelta(microseconds=1) if end else None, cause=cause), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *NR_CEL()))
                dprint(f"{t}, {end} | NAS_Recovery PCI={serv_cell} -> PCI={target_cell} | {cause}")
                dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                pscell = NR_CEL()
                dprint()
        
        ### Update previous pci, freq
        prev_pci, prev_freq = pci, freq
    
    ### Build DataFrame
    df_HO = pd.DataFrame()
    for key in A.keys():
        df_HO = pd.concat([df_HO, \
            pd.DataFrame(A[key], index=[key]*len(A[key]))])
    if df_HO.empty:
        print("************** Empty DataFrame!! **************")
    df_HO = df_HO.sort_values(by=['start']).reset_index()
    df_HO = df_HO.rename(columns={'index': 'ho_type'})
    df_HO = df_HO.reindex(
        ['start','end','ho_type','interrupt','sPCI','sFreq','tPCI','tFreq','snrPCI','tnrPCI','cause','others','st_scell'] + \
            df_HO.columns.tolist()[df_HO.columns.get_loc('ePCI'):df_HO.columns.get_loc('nrFreq1')+1], axis=1)
    df_HO['start'] = pd.to_datetime(df_HO['start'])
    df_HO['end'] = pd.to_datetime(df_HO['end'])
    df_HO['Timestamp'] = df_HO['start']
    df_HO['Type_ID'] = 'RRC_OTA_Handover_Parsing'
    df_HO['interrupt'] = (df_HO['end'] - df_HO['start']).dt.total_seconds()
    ### Set dtypes
    df_HO['ho_type'] = df_HO['ho_type'].astype('category')
    df_HO['cause'] = df_HO['cause'].astype('category')
    df_HO['others'] = df_HO['others'].astype('string')
    df_HO['st_scell'] = df_HO['st_scell'].astype('Int8')
    df_HO['DL_BW'] = df_HO['DL_BW'].astype('category')
    df_HO['DL_BW1'] = df_HO['DL_BW1'].astype('category')
    df_HO['UL_BW'] = df_HO['UL_BW'].astype('category')
    df_HO['UL_BW1'] = df_HO['UL_BW1'].astype('category')
    for tag in df_HO.columns[df_HO.columns.get_loc('sPCI'):df_HO.columns.get_loc('nrFreq1')+1]:
        if tag not in ['cause','others','DL_BW','DL_BW1','UL_BW','UL_BW1']:
            df_HO[tag] = df_HO[tag].astype('Int32')
    df_HO['interrupt'] = df_HO['interrupt'].astype('float32')
    df_HO['Timestamp'] = pd.to_datetime(df_HO['Timestamp'])
    df_HO['Type_ID'] = df_HO['Type_ID'].astype('category')
    return A, D, df_HO

### Parsing

In [22]:
# df_ho = pd.read_csv("/Users/jackbedford/Desktop/MOXA/Code/data/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01/data/diag_log_qc01_2023-02-04_14-57-22_rrc.csv")
_, _, df_ho = mi_parse_ho(df_ho, +8)

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df_ho)

Unnamed: 0,start,end,ho_type,interrupt,sPCI,sFreq,tPCI,tFreq,snrPCI,tnrPCI,cause,others,st_scell,ePCI,ECI,eNB,BID,DL_Freq,DL_BW,UL_Freq,UL_BW,ePCI1,ECI1,eNB1,BID1,DL_Freq1,DL_BW1,UL_Freq1,UL_BW1,nrPCI,nrFreq,nrPCI1,nrFreq1,Timestamp,Type_ID
0,2023-02-04 16:14:55.534483,2023-02-04 16:14:55.662803,Conn_Setup,0.12832,,,35.0,3050.0,,,,,0,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,,,2023-02-04 16:14:55.534483,RRC_OTA_Handover_Parsing
1,2023-02-04 16:14:55.825220,2023-02-04 16:14:55.864444,SN_Setup,0.039224,35.0,3050.0,,,,35.0,,Set up 1 SCell.,1,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,35.0,631000.0,2023-02-04 16:14:55.825220,RRC_OTA_Handover_Parsing
2,2023-02-04 16:15:09.036863,NaT,Conn_Rel,,35.0,3050.0,,,35.0,,,,0,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,,,,,,,,,35.0,631000.0,,,2023-02-04 16:15:09.036863,RRC_OTA_Handover_Parsing
3,2023-02-04 16:16:16.509426,2023-02-04 16:16:16.633288,Conn_Setup,0.123862,,,35.0,3050.0,,,,,0,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,,,2023-02-04 16:16:16.509426,RRC_OTA_Handover_Parsing
4,2023-02-04 16:16:16.767598,2023-02-04 16:16:16.805470,SN_Setup,0.037872,35.0,3050.0,,,,35.0,,Set up 1 SCell.,1,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,35.0,631000.0,2023-02-04 16:16:16.767598,RRC_OTA_Handover_Parsing
5,2023-02-04 16:17:27.953435,2023-02-04 16:17:27.986813,SN_Rel,0.033378,35.0,3050.0,,,35.0,,,Set up 1 SCell.,1,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,35.0,631000.0,,,2023-02-04 16:17:27.953435,RRC_OTA_Handover_Parsing
6,2023-02-04 16:17:28.156703,2023-02-04 16:17:28.199117,SN_Setup,0.042414,35.0,3050.0,,,,160.0,,Set up 1 SCell.,1,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,,,160.0,631000.0,2023-02-04 16:17:28.156703,RRC_OTA_Handover_Parsing
7,2023-02-04 16:17:42.004563,2023-02-04 16:17:42.019062,SN_HO,0.014499,35.0,3050.0,,,160.0,35.0,,,0,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,160.0,631000.0,35.0,631000.0,2023-02-04 16:17:42.004563,RRC_OTA_Handover_Parsing
8,2023-02-04 16:18:10.084393,2023-02-04 16:18:10.117903,SN_Rel,0.03351,35.0,3050.0,,,35.0,,,Set up 1 SCell.,1,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,35.0,631000.0,,,2023-02-04 16:18:10.084393,RRC_OTA_Handover_Parsing
9,2023-02-04 16:18:10.412847,2023-02-04 16:18:10.454196,SN_Setup,0.041349,35.0,3050.0,,,,35.0,,Set up 1 SCell.,1,35,54305826,212132,7,3050,20 MHz,21050,20 MHz,35.0,54305826.0,212132.0,7.0,3050.0,20 MHz,21050.0,20 MHz,35.0,631000.0,35.0,631000.0,2023-02-04 16:18:10.412847,RRC_OTA_Handover_Parsing


In [23]:
df_ho['mnpp'] = pd.Series(0, dtype='Int8')
df_ho['snpp'] = pd.Series(0, dtype='Int8')
qlte = myQueue(7)
qnr = myQueue(7)
for i, row in df_ho.iterrows():
    # print(row.ho_type)
    # print(qlte.tolist())
    # print(qnr.tolist())
    if row.ho_type == 'Conn_Rel':
        qlte.clear()
        qnr.clear()
        continue
    if row.ho_type in ['Conn_Setup','LTE_HO','MN_HO','MNSN_HO']:
        if (row.tPCI, row.tFreq) in qlte.tolist()[:-1]:
            df_ho.at[i, 'mnpp'] = 1
        if (row.tPCI, row.tFreq) != qlte.rear():
            qlte.push((row.tPCI, row.tFreq))
    if row.ho_type in ['SN_Setup','SN_HO','MNSN_HO','SN_Rel','LTE_HO']:
        if row.ho_type in ['SN_Rel','LTE_HO']:
            qnr.clear()
            continue
        if row.tnrPCI in qnr.tolist()[:-1]:
            df_ho.at[i, 'snpp'] = 1
        if row.tnrPCI != qnr.rear():
            qnr.push(row.tnrPCI)

# display(df_ho.loc[np.in1d(df_ho['ho_type'], ['Conn_Setup','LTE_HO','MN_HO','MNSN_HO']), ['ho_type','tPCI','tFreq','mnpp']])
# display(df_ho.loc[np.in1d(df_ho['ho_type'], ['SN_Setup','SN_HO','MNSN_HO']), ['ho_type','tnrPCI','snpp']])

df_ho['ho_type1'] = 'none'
df_ho['ho_type2'] = 'none'
df_ho.loc[(np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'])) & (df_ho['sFreq'] == df_ho['tFreq']), 'ho_type1'] = 'intra_freq'
df_ho.loc[(np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'])) & (df_ho['sFreq'] != df_ho['tFreq']), 'ho_type1'] = 'inter_freq'
df_ho.loc[(np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'])) & (df_ho['sPCI'] == df_ho['tPCI']), 'ho_type2'] = 'intra_sector'
df_ho.loc[(np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'])) & (df_ho['sPCI'] != df_ho['tPCI']), 'ho_type2'] = 'inter_sector'
df_ho.loc[(np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'])) & df_ho['eNB'].notna() & df_ho['eNB1'].notna() & (df_ho['eNB'] != df_ho['eNB1']), 'ho_type2'] = 'inter_enb'
df_ho.loc[(np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'])) & df_ho['eNB'].isna() | df_ho['eNB1'].isna(), 'ho_type2'] = 'unknown_enb'

# display(df_ho.loc[np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO']), ['ho_type','ho_type1','ho_type2','cause','others','st_scell']])
# display(df_ho.loc[~np.in1d(df_ho['ho_type'], ['LTE_HO','MN_HO','MNSN_HO']), ['ho_type','ho_type1','ho_type2','cause','others','st_scell']])

for tag in ['eNB','sPCI','sFreq','snrPCI','eNB1','tPCI','tFreq','tnrPCI']:
    df_ho[tag] = df_ho[tag].astype('string')
    df_ho[tag] = df_ho[tag].fillna('')
df_ho['ho_src'] = df_ho['eNB'] + '@' + df_ho['sPCI'] + '@' + df_ho['sFreq'] + '@' + df_ho['snrPCI']
df_ho['ho_tgt'] = df_ho['eNB1'] + '@' + df_ho['tPCI'] + '@' + df_ho['tFreq'] + '@' + df_ho['tnrPCI']

df_ho['ho_type'] = df_ho['ho_type'].astype('string')
df_ho['cause'] = df_ho['cause'].astype('string')
df_ho.loc[np.in1d(df_ho['ho_type'], ['MCG_Failure','NAS_Recovery']), 'ho_type'] = \
    df_ho.loc[np.in1d(df_ho['ho_type'], ['MCG_Failure','NAS_Recovery']), 'ho_type'] + '_' + df_ho.loc[(np.in1d(df_ho['ho_type'], ['MCG_Failure','NAS_Recovery'])), 'cause']

df_ho = df_ho[~np.in1d(df_ho['ho_type'], ['Conn_Setup','Conn_Rel'])].copy().reset_index(drop=True)
handover_type = ['LTE_HO','SN_Setup','SN_Rel','MN_HO','SN_HO','MNSN_HO',
                 'SCG_Failure',
                 'MCG_Failure_reconfigurationFailure (0)','MCG_Failure_handoverFailure (1)','MCG_Failure_otherFailure (2)',
                 'NAS_Recovery_reconfigurationFailure (0)','NAS_Recovery_handoverFailure (1)','NAS_Recovery_otherFailure (2)']
handover_type1 = ['intra_freq','inter_freq']
handover_type2 = ['intra_sector','inter_sector','inter_enb']

df_ho = pd.concat([pd.DataFrame(handover_type, columns=['ho_type']),
                   pd.DataFrame(handover_type1, columns=['ho_type1']),
                   pd.DataFrame(handover_type2, columns=['ho_type2']),
                   df_ho], ignore_index=True)
df_ho = df_ho[['Timestamp','ho_type','ho_type1','ho_type2','ho_src','ho_tgt','st_scell','mnpp','snpp']]

dum0 = pd.get_dummies(df_ho.ho_type).astype('Int8')
dum1 = pd.get_dummies(df_ho.ho_type1).astype('Int8')
dum2 = pd.get_dummies(df_ho.ho_type2).astype('Int8')
df_ho = pd.concat([df_ho, dum0, dum1, dum2], axis=1)
df_ho = df_ho.dropna(subset='Timestamp').copy().reset_index(drop=True)
df_ho = df_ho[['Timestamp', 'ho_type', 'ho_type1', 'ho_type2', 'ho_src','ho_tgt', 'st_scell', 'mnpp', 'snpp',
               *handover_type, *handover_type1, *handover_type2]]
# df_ho = df_ho.drop(columns=['ho_type','ho_type1','ho_type2','ho_src','ho_tgt'])

df_ho['Timestamp'] = df_ho['Timestamp'].dt.round('S')
tsh_group = df_ho.groupby(['Timestamp'])

dfagg_ho = df_ho.loc[~df_ho.duplicated(subset=['Timestamp']), ['Timestamp', 'ho_type', 'ho_type1', 'ho_type2', 'ho_src', 'ho_tgt']].copy().reset_index(drop=True)
dfagg_ho1 = tsh_group['ho_type'].agg(['count']).copy().reset_index()
dfagg_ho1 = dfagg_ho1.rename(columns={'count':'ho_num'})
dfagg_ho = pd.merge(dfagg_ho, dfagg_ho1, on='Timestamp', how='outer')
dfagg_ho['ho_num'] = dfagg_ho['ho_num'].astype('Int8')

dfagg_ho = dfagg_ho.set_index('Timestamp')
for i, row in df_ho[df_ho.duplicated(subset=['Timestamp'])].copy().iterrows():
    dfagg_ho.at[row.Timestamp, 'ho_type'] = dfagg_ho.at[row.Timestamp, 'ho_type'] + '+' + row.ho_type
    dfagg_ho.at[row.Timestamp, 'ho_type1'] = dfagg_ho.at[row.Timestamp, 'ho_type1'] + '+' + row.ho_type1
    dfagg_ho.at[row.Timestamp, 'ho_type2'] = dfagg_ho.at[row.Timestamp, 'ho_type2'] + '+' + row.ho_type2
    dfagg_ho.at[row.Timestamp, 'ho_src'] = dfagg_ho.at[row.Timestamp, 'ho_src'] + '+' + row.ho_src
    dfagg_ho.at[row.Timestamp, 'ho_tgt'] = dfagg_ho.at[row.Timestamp, 'ho_tgt'] + '+' + row.ho_tgt
dfagg_ho = dfagg_ho.reset_index()

dfagg_ho = pd.merge(dfagg_ho, tsh_group['st_scell'].max().copy().reset_index(),
                    on='Timestamp', how='outer')
dfagg_ho = pd.merge(dfagg_ho, tsh_group[['mnpp', 'snpp']].sum().copy().reset_index(),
                    on='Timestamp', how='outer')
dfagg_ho = pd.merge(dfagg_ho, tsh_group[[*handover_type, *handover_type1, *handover_type2]].sum().copy().reset_index(),
                    on='Timestamp', how='outer')

display(dfagg_ho)

Unnamed: 0,Timestamp,ho_type,ho_type1,ho_type2,ho_src,ho_tgt,ho_num,st_scell,mnpp,snpp,...,MCG_Failure_handoverFailure (1),MCG_Failure_otherFailure (2),NAS_Recovery_reconfigurationFailure (0),NAS_Recovery_handoverFailure (1),NAS_Recovery_otherFailure (2),intra_freq,inter_freq,intra_sector,inter_sector,inter_enb
0,2023-02-04 16:14:56,SN_Setup,none,none,212132@35@3050@,212132@@@35,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2023-02-04 16:16:17,SN_Setup,none,none,212132@35@3050@,212132@@@35,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2023-02-04 16:17:28,SN_Rel+SN_Setup,none+none,none+none,212132@35@3050@35+212132@35@3050@,212132@@@+212132@@@160,2,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2023-02-04 16:17:42,SN_HO,none,none,212132@35@3050@160,212132@@@35,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2023-02-04 16:18:10,SN_Rel+SN_Setup,none+none,none+none,212132@35@3050@35+212132@35@3050@,212132@@@+212132@@@35,2,1,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2023-02-04 16:18:56,MN_HO+MN_HO,intra_freq+intra_freq,inter_enb+inter_enb,212132@35@3050@35+213133@186@3050@35,213133@186@3050@+212783@266@3050@,2,1,0,0,...,0,0,0,0,0,1,0,0,0,1
6,2023-02-04 16:19:02,MN_HO,intra_freq,inter_enb,212783@266@3050@35,212132@35@3050@,1,1,1,0,...,0,0,0,0,0,1,0,0,0,1
7,2023-02-04 16:19:25,MN_HO,intra_freq,inter_enb,212132@35@3050@35,212724@400@3050@,1,1,0,0,...,0,0,0,0,0,1,0,0,0,1
8,2023-02-04 16:19:27,MN_HO+MN_HO,intra_freq+intra_freq,inter_enb+inter_sector,212724@400@3050@35+212783@266@3050@35,212783@266@3050@+212783@274@3050@,2,1,1,0,...,0,0,0,0,0,1,0,0,1,1
9,2023-02-04 16:19:28,SN_Rel+SN_Setup,none+none,none+none,212783@274@3050@35+212783@274@3050@,212783@@@+212783@@@186,2,1,0,0,...,0,0,0,0,0,0,0,0,0,0


## Concat Stage 2

In [24]:
dfagg = pd.merge(dfagg, dfagg_ho, on='Timestamp', how='left').copy().reset_index(drop=True)
for tag in ['ho_num','st_scell','mnpp','snpp',*handover_type,*handover_type1,*handover_type2]:
    dfagg[tag] = dfagg[tag].fillna(0)

for i, row in dfagg[dfagg['PCI'].isna()].iterrows():
    if i == 0:
        continue
    j = i - 1
    while pd.isna(dfagg.at[j, 'PCI']):
        j -= 1
    dfagg.loc[i, ['PCI','EARFCN','RSRP','RSRQ']] = dfagg.loc[j, ['PCI','EARFCN','RSRP','RSRQ']]

In [25]:
with pd.option_context('display.max_columns', None, 'display.max_rows', None):
    # display(dfagg.isna().sum())
    # display(dfagg[dfagg['ho_num'] > 0])
    # display(dfagg.dtypes)
    # display(dfagg)
    display(dfagg[dfagg['PCI'].isna()])

Unnamed: 0,Timestamp,tx_count,loss,lorate,rx_count,excl,exrate,loexrate,mean_lat,min_lat,max_lat,median_lat,std_lat,jitter,PCI,EARFCN,RSRP,RSRQ,nrPCI,nrARFCN,nrRSRP,nrRSRQ,ho_type,ho_type1,ho_type2,ho_src,ho_tgt,ho_num,st_scell,mnpp,snpp,LTE_HO,SN_Setup,SN_Rel,MN_HO,SN_HO,MNSN_HO,SCG_Failure,MCG_Failure_reconfigurationFailure (0),MCG_Failure_handoverFailure (1),MCG_Failure_otherFailure (2),NAS_Recovery_reconfigurationFailure (0),NAS_Recovery_handoverFailure (1),NAS_Recovery_otherFailure (2),intra_freq,inter_freq,intra_sector,inter_sector,inter_enb


In [26]:
dfagg.to_pickle('input_data_processed.pkl')