In [66]:
import os
import sys
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from tqdm.notebook import tqdm
from collections import namedtuple
from collections import defaultdict
from pprint import pprint
import pickle
from functools import reduce

# Configure display options
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)
%config InlineBackend.figure_format = 'retina'

# Add project directory to sys.path
# pdir = os.path.abspath(os.path.join(os.getcwd(), '.'))
# sys.path.insert(1, pdir)
# from myutils import *

# Other module imports
import ast, math, swifter, csv, json, itertools as it, portion as P

# Set plot style
# plt.style.use('ggplot')

# Functiuon Definitions

In [67]:
# ************************************************* unify data format *************************************************

def set_data(df, mode='pcap', tz=0):
    def nr_serv_cel(row):
        pos = row.serv_cel_pos
        if pos == 255:
            return 65535, -160, -50
        else:
            return row[f'PCI{pos}'], row[f'RSRP{pos}'], row[f'RSRQ{pos}']
    
    if mode == 'pcap':
        common_column_names = [
            'seq', 'rpkg', 'frame_id', 'Timestamp', 'Timestamp_epoth', 'lost', 'excl', 'latency',
            'xmit_time', 'xmit_time_epoch', 'arr_time', 'arr_time_epoch',
        ]
        
        if df.empty:
            return pd.DataFrame(columns=common_column_names)
        
        date_columns = ['Timestamp', 'xmit_time', 'arr_time']
        df[date_columns] = df[date_columns].apply(pd.to_datetime)
        df[['seq', 'rpkg', 'frame_id']] = df[['seq', 'rpkg', 'frame_id']].astype('Int32')
        df[['Timestamp_epoch', 'xmit_time_epoch', 'arr_time_epoch', 'latency']] = \
            df[['Timestamp_epoch', 'xmit_time_epoch', 'arr_time_epoch', 'latency']].astype('float32')
        df[['lost', 'excl']] = df[['lost', 'excl']].astype('boolean')

    if mode in ['lte', 'nr']:
        common_column_names = [
            'Timestamp', 'type_id', 'PCI', 'RSRP', 'RSRQ', 'serv_cel_index', 'EARFCN', 'NR_ARFCN', 
            'num_cels', 'num_neigh_cels', 'serv_cel_pos', 'PCI0', 'RSRP0', 'RSRQ0',
        ]
        
        if df.empty:
            return pd.DataFrame(columns=common_column_names)
        
        if mode == 'lte':
            columns_mapping = {
                'RSRP(dBm)': 'RSRP',
                'RSRQ(dB)': 'RSRQ',
                'Serving Cell Index': 'serv_cel_index',
                'Number of Neighbor Cells': 'num_neigh_cels',
                'Number of Detected Cells': 'num_cels',
            }
            columns_order = [*common_column_names, *df.columns[df.columns.get_loc('PCI1'):].tolist()]
            
            df = df.rename(columns=columns_mapping).reindex(columns_order, axis=1)
            df['serv_cel_index'] = np.where(df['serv_cel_index'] == '(MI)Unknown', '3_SCell', df['serv_cel_index'])
            df['num_cels'] = df['num_neigh_cels'] + 1
            df['type_id'] = 'LTE_PHY'

        if mode == 'nr':
            columns_mapping = {
                'Raster ARFCN': 'NR_ARFCN',
                'Serving Cell Index': 'serv_cel_pos',
                'Num Cells': 'num_cels',
            }
            columns_order = [*common_column_names, *df.columns[df.columns.get_loc('PCI1'):].tolist()]
            
            df = df.rename(columns=columns_mapping).reindex(columns_order, axis=1)
            df[['PCI', 'RSRP', 'RSRQ']] = df.apply(nr_serv_cel, axis=1, result_type='expand')
            df['serv_cel_index'] = np.where(df['serv_cel_pos'] == 255, df['serv_cel_index'], 'PSCell')
            df['num_neigh_cels'] = np.where(df['serv_cel_pos'] == 255, df['num_cels'], df['num_cels'] - 1)
            df['type_id'] = '5G_NR_ML1'
        
        df['Timestamp'] = pd.to_datetime(df['Timestamp']) + pd.Timedelta(hours=tz)
        df[['type_id', 'serv_cel_index']] = df[['type_id', 'serv_cel_index']].astype('category')
        df[['EARFCN', 'NR_ARFCN']] = df[['EARFCN', 'NR_ARFCN']].astype('Int32')
        df[['num_cels', 'num_neigh_cels', 'serv_cel_pos']] = df[['num_cels', 'num_neigh_cels', 'serv_cel_pos']].astype('UInt8')

        for tag in df.columns:
            if tag.startswith('PCI'):
                df[tag] = df[tag].astype('Int32')
            if tag.startswith(('RSRP', 'RSRQ')):
                df[tag] = df[tag].astype('float32')

    return df

# ************************************************* Sheng-Ru Latest Version (09-25) add try except (10-27) *************************************************

def parse_mi_ho(df, tz=8):

    # df = pd.read_csv(f)
    df["Timestamp"] = df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=tz))
    nr_pci = 'O'
    scells = []

    def NR_OTA(idx):

        if df["type_id"].iloc[idx] == "5G_NR_RRC_OTA_Packet": return True
        else: return False
    
    def LTE_SERV_INFO(idx):

        if df["type_id"].iloc[idx] == "LTE_RRC_Serv_Cell_Info": return True
        else: return False
    

    def find_1st_after(start_idx, target, look_after=1):
        for j in range(start_idx, len(df)):
            t_ = df["Timestamp"].iloc[j]
            if NR_OTA(j) or LTE_SERV_INFO(j):
                continue
            if (t_ - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] not in [0,'0'] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_1st_before(start_idx, target, look_before=1):
        for j in range(start_idx, -1, -1):
            t_ = df["Timestamp"].iloc[j]
            if NR_OTA(j) or LTE_SERV_INFO(j):
                continue
            if (t - t_).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] not in [0,'0'] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_1st_before_with_special_value(start_idx, target, target_value, look_before=1):
        for j in range(start_idx, -1, -1):
            t_ = df["Timestamp"].iloc[j]
            if NR_OTA(j) or LTE_SERV_INFO(j):
                continue
            if (t - t_).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] in [target_value] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_in_D_exact(targets):

        l = []
        # In l : (second, ho_type)
        for target in targets:
            for ho in D[target]:
                l.append(((t - ho.start).total_seconds(), target))

        if len(l) != 0:
            for x in l:
                if (x[0]== 0):
                    return x[1]
        
        return None
    
    def find_in_D_first_before(targets, look_before=1):

        l = []
        # In l : (second, ho_type)
        for target in targets:
            for ho in D[target]:
                try:
                    l.append(((t - ho.end).total_seconds(), target, ho))
                except:
                    pass

        if len(l) != 0:
            closest = min(filter(lambda x: x[0] > 0, l), key=lambda x: x[0])
            if 0 <= closest[0] < look_before:
                return closest[1], closest[2]
        
        return None, None
    
    HO = namedtuple('HO',['start', 'end', 'others', 'trans'], defaults=[None,None,'',''])
    
    D = {
        'Conn_Rel':[], 
        'Conn_Req':[], # Setup
        'LTE_HO': [], # LTE -> newLTE
        'MN_HO': [], # LTE + NR -> newLTE + NR
        'MN_HO_to_eNB': [], # LTE + NR -> newLTE
        'SN_setup': [], # LTE -> LTE + NR => NR setup
        'SN_Rel': [], # LTE + NR -> LTE
        'SN_HO': [], # LTE + NR -> LTE + newNR  
        'RLF_II': [],
        'RLF_III': [],
        'SCG_RLF': [],
        'Add_SCell': [],
        }

    for i in range(len(df)):

        # Pass NR RRC packet. In NSA mode, LTE RRC packet include NR packet message.
        if NR_OTA(i) or LTE_SERV_INFO(i):
            continue

        others = ''
        t = df["Timestamp"].iloc[i]

        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))
            nr_pci = 'O'

        if df["rrcConnectionRequest"].iloc[i] == 1:
            
            # Define end of rrcConnectionRequest to be rrcConnectionReconfigurationComplete or securityModeComplete.
            a = find_1st_after(i, 'rrcConnectionReconfigurationComplete',look_after=2)[0]
            b = find_1st_after(i, 'securityModeComplete',look_after=2)[0]
            if a is None and b is None: end = None
            elif a is None and b is not None: end = b
            elif a is not None and b is None: end = a 
            else: end = a if a > b else b
            
            serv_cell, serv_freq = df["PCI"].iloc[i], int(df["Freq"].iloc[i])
            trans = f'? -> ({serv_cell}, {serv_freq})'
            D['Conn_Req'].append(HO(start=t,end=end,trans=trans))
            nr_pci = 'O'
        
        if df["lte-rrc.t304"].iloc[i] == 1:
            
            end, _ = find_1st_after(i, 'rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = df["PCI"].iloc[i], int(df['lte_targetPhysCellId'].iloc[i])
            serv_freq, target_freq = int(df["Freq"].iloc[i]), int(df['dl-CarrierFreq'].iloc[i])

            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others += f' Set up {n} SCell.'
            else:
                scells = []
            
            if serv_freq != target_freq:
                a,b = find_1st_before(i, "rrcConnectionReestablishmentRequest", 1)
                others += " Inter frequency HO."
                if a is not None:
                    others += " Near after RLF."
                
            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                
                if serv_cell == target_cell and serv_freq == target_freq:

                    a, _ = find_1st_before(i, "rrcConnectionReestablishmentRequest", 2)
                    
                    if a is not None:

                        ho_type, ho = find_in_D_first_before(['RLF_II', 'RLF_III'], 2)
                        try:
                            others += f' Near after RLF of trans: {ho.trans}.'
                        except:
                            others += f' Near after RLF.'

                    else:
                        
                        ho_type, _ = find_in_D_first_before(['MN_HO_to_eNB', 'SN_Rel'], 2)
                        if ho_type is not None:
                            others += f' Near after {ho_type}.'

                    ori_serv = nr_pci
                    nr_pci = int(df['nr_physCellId'].iloc[i])
                    trans = f'({serv_cell}, {serv_freq}) | {ori_serv} -> {nr_pci}'
                    D['SN_setup'].append(HO(start=t, end=end, others=others, trans=trans))

                else:
                    
                    nr_pci = int(df['nr_physCellId'].iloc[i])
                    trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {nr_pci}'
                    D['MN_HO'].append(HO(start=t, end=end, others=others, trans=trans))

            else:
                
                if serv_cell == target_cell and serv_freq == target_freq:

                    a, b = find_1st_before(i, "scgFailureInformationNR-r15")
                    if a is not None:
                        others += " Caused by scg-failure."
                    
                    orig_serv = nr_pci
                    nr_pci = 'O'
                    trans = f'({serv_cell}, {serv_freq}) | {orig_serv} -> {nr_pci}'
                    D['SN_Rel'].append(HO(start=t, end=end, others=others, trans=trans))
                    
                else:

                    a, _ = find_1st_before(i,"rrcConnectionSetup",3)
                    if a is not None:
                        others += ' Near After connection setup.'
                    if nr_pci == 'O':
                        trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {nr_pci}'
                        D['LTE_HO'].append(HO(start=t, end=end, others=others, trans=trans))
                    else:
                        orig_serv = nr_pci
                        nr_pci = 'O'
                        trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {orig_serv} -> {nr_pci}'
                        D['MN_HO_to_eNB'].append(HO(start=t, end=end, others=others, trans=trans))


        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:

            end, _ = find_1st_after(i,'rrcConnectionReconfigurationComplete')
        
            serv_cell, serv_freq = df["PCI"].iloc[i], int(df["Freq"].iloc[i])
            orig_serv = nr_pci
            nr_pci = int(df['nr_physCellId'].iloc[i])
            trans = f'({serv_cell}, {serv_freq}) | {orig_serv} -> {nr_pci}'
            D['SN_HO'].append(HO(start=t,end=end,trans=trans))


        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:

            end1, _ = find_1st_after(i, 'rrcConnectionReestablishmentComplete', look_after=1)
            b, _ = find_1st_after(i, 'rrcConnectionReestablishmentReject', look_after=1)
            end2, _ = find_1st_after(i, 'securityModeComplete',look_after=3)

            others += ' ' + df["reestablishmentCause"].iloc[i] + '.'
            scells = []

            c, _ = find_1st_before(i, 'scgFailureInformationNR-r15', 1)
            if c != None:
                others  += ' caused by scgfailure.'
                
            serv_cell, rlf_cell = df["PCI"].iloc[i], int(df['physCellId.3'].iloc[i])
            serv_freq = int(df['Freq'].iloc[i])
            
            # Type II & Type III
            if end1 is not None: 

                orig_serv = nr_pci
                nr_pci = 'O'
                _, idx = find_1st_before_with_special_value(i, 'PCI', rlf_cell, look_before=10)
                try:
                    rlf_freq = int(df['Freq'].iloc[idx])
                except:
                    rlf_freq = 0
                trans = f'({rlf_cell}, {rlf_freq}) -> ({serv_cell}, {serv_freq}) | {orig_serv} -> {nr_pci}'
                D['RLF_II'].append(HO(start=t,end=end1,others=others,trans=trans))

            elif b is not None and end2 is not None:
                
                orig_serv = nr_pci
                nr_pci = 'O'
                _, idx = find_1st_before_with_special_value(i, 'PCI', rlf_cell, look_before=10)
                try:
                    rlf_freq = int(df['Freq'].iloc[idx])
                except:
                    rlf_freq = 0

                _, idx = find_1st_after(i, "rrcConnectionRequest", 2)
                recon_cell, recon_freq = df['PCI'].iloc[idx], int(float(df['Freq'].iloc[idx]))
                
                trans = f'({rlf_cell}, {rlf_freq}) -> ({recon_cell}, {recon_freq}) | {orig_serv} -> {nr_pci}'
                D['RLF_III'].append(HO(start=t,end=end2,others=others,trans=trans))
                
            else:
                others+=' No end.'
                D['RLF_II'].append(HO(start=t,others=others))
                print('No end for RLF')

        if df["scgFailureInformationNR-r15"].iloc[i] == 1:

            others += ' ' + df["failureType-r15"].iloc[i] + '.'
            a, idx1 = find_1st_after(i, "rrcConnectionReestablishmentRequest", look_after=1)
            b, idx2 = find_1st_after(i, "lte-rrc.t304", look_after=10)

            if a is not None:

                end1, _ = find_1st_after(idx1, 'rrcConnectionReestablishmentComplete', look_after=1)
                b, _ = find_1st_after(idx1, 'rrcConnectionReestablishmentReject', look_after=1)
                end2 = find_1st_after(idx1, 'securityModeComplete',look_after=3)[0]

                others += ' Result in rrcReestablishment.'
                    
                # Type II & Type III Result
                if end1 is not None: 
                    D['SCG_RLF'].append(HO(start=t,end=end1,others=others))
                elif b is not None and end2 is not None: 
                    D['SCG_RLF'].append(HO(start=t,end=end2,others=others))
                else:
                    others += ' No end.'
                    D['SCG_RLF'].append(HO(start=t,others=others))
                    print('No end for scg failure result in rrcReestablishment.')

            elif b is not None:

                end, _ = find_1st_after(idx2, 'rrcConnectionReconfigurationComplete')
                serv_cell, target_cell = df["PCI"].iloc[idx2], df['lte_targetPhysCellId'].iloc[idx2]
                serv_freq, target_freq = int(df["Freq"].iloc[idx2]), df['dl-CarrierFreq'].iloc[idx2]
                others += ' Result in gNB release.'
                # We do not change nr_pci here. Instead, we will change it at gNB_Rel event.
                trans = f'({serv_cell}, {serv_freq}) | {nr_pci} -> O'
                
                if serv_cell == target_cell and serv_freq == target_freq:
                    D['SCG_RLF'].append(HO(start=t,end=end,others=others,trans=trans))
                else:
                    others += ' Weird gNB release.'
                    print('Weird for scg failure result in gNb Release.')
                    D['SCG_RLF'].append(HO(start=t,end=end,others=others,trans=trans))                  

            else:

                print('No end for scg failure.')
                others += ' No end.'
                D['SCG_RLF'].append(HO(start=t,others=others))
        
        if df['SCellToAddMod-r10'].iloc[i] == 1 and df['physCellId-r10'].iloc[i] != 'nr or cqi report':

            others = ''
            pcis = str(df["physCellId-r10"].iloc[i]).split('@')
            freqs = str(df["dl-CarrierFreq-r10"].iloc[i]).split('@')
            orig_scells = scells
            scells = [(int(float(pci)), int(float(freq))) for pci, freq in zip(pcis, freqs)]

            others += f' Set up {len(scells)} SCell.'
            trans = f'{orig_scells} -> {scells}'

            end, _ = find_1st_after(i,'rrcConnectionReconfigurationComplete')
            
            a, _ = find_1st_before(i, "rrcConnectionReestablishmentRequest", 3)
            if a is not None:
                others += ' Near after RLF.'

            a = find_in_D_exact(['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 'SN_Rel'])
            if a is not None:
                others += f' With {a}.'

            D['Add_SCell'].append(HO(start=t,end=end,others=others, trans=trans))
    
    return D

# ************************************************* parse into readable dataframe *************************************************

def mi_parse_handover(df, tz=8, radical=True):
    
    def parse_trans(item):
        
        chunk = item.split(' | ')
        
        if len(chunk) == 1:
            s_src = np.nan
            s_tgt = np.nan
            if chunk[0] == '':
                m_src = np.nan
                m_tgt = np.nan
            elif chunk[0][0] == '?':
                m_src = np.nan
                m_tgt = chunk[0].split(' -> ')[1]
            else:
                m_src = chunk[0].split(' -> ')[0]
                m_tgt = chunk[0].split(' -> ')[1]
        else:
            if chunk[1] == 'O':
                s_src = np.nan
                s_tgt = np.nan
            else:
                chunk1 = chunk[1].split(' -> ')
                if len(chunk1) == 1:
                    s_src = chunk1[0]
                    s_tgt = np.nan
                else:
                    s_src = chunk1[0] if chunk1[0] != 'O' else np.nan
                    s_tgt = chunk1[1] if chunk1[1] != 'O' else np.nan
                
            chunk1 = chunk[0].split(' -> ')
            if len(chunk1) == 1:
                m_src = chunk1[0]
                m_tgt = np.nan
            else:
                m_src = chunk1[0]
                m_tgt = chunk1[1]
                
        return m_src, m_tgt, s_src, s_tgt
    
    key_mapping = {
        'Conn_Rel': 'CXNR',
        'Conn_Req': 'CXNS',
        'LTE_HO': 'LTEH',
        'MN_HO': 'MCGH',
        'MN_HO_to_eNB': 'SCGR-II',
        'SN_setup': 'SCGA',
        'SN_Rel': 'SCGR-I',
        'SN_HO': 'SCGM',
        'RLF_II': 'MCGF',
        'RLF_III': 'NASR',
        'SCG_RLF': 'SCGF',
        'Add_SCell': 'SCLA'
    }
    
    D = parse_mi_ho(df, tz)
    
    # rename as acronym
    new_D = {key_mapping.get(key, key): value for key, value in D.items()}
    
    if df.empty:
        selected_cols = ['PCI', 'Cell Identity', 'eNB_ID', 'next_eNB_ID', 'Band ID', 'next_BID', 'DL frequency', 'UL frequency', 'DL bandwidth', 'UL bandwidth', 'TAC', 'MCC', 'MNC']
        table = pd.DataFrame(columns=['type', 'start', 'end', 'others', 'm_src', 'm_tgt', 's_src', 's_tgt', 'category', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-RAT', '4G_5G', 'cause', 'near_before_RLF', *selected_cols])
        print('Empty RRC File!!!')
        return table, new_D
    
    table = pd.DataFrame()
    for key, lst in new_D.items():
        table1 = pd.DataFrame(lst, index=[key]*len(lst)).reset_index(names='type')
        table = pd.concat([table, table1], ignore_index=True)

    # add Cell Identity & eNB ID
    sc_info = df[df['type_id'] == 'LTE_RRC_Serv_Cell_Info'][['Timestamp', 'type_id', 'PCI', 'Cell Identity', 'Band ID', 'DL frequency', 'UL frequency', 'DL bandwidth', 'UL bandwidth', 'TAC', 'MCC', 'MNC']].reset_index(drop=True).rename(columns={'Timestamp': 'start', 'type_id': 'type'})
    sc_info['eNB_ID'] = sc_info['Cell Identity'] // 256
    # sc_info['Cell_ID'] = sc_info['Cell Identity'] % 256
    sc_info = sc_info[['start', 'type', 'PCI', 'Cell Identity', 'eNB_ID', 'Band ID', 'DL frequency', 'UL frequency', 'DL bandwidth', 'UL bandwidth', 'TAC', 'MCC', 'MNC']]

    table = pd.concat([table, sc_info], ignore_index=True).sort_values(by='start').reset_index(drop=True)

    is_not_start = True
    selected_cols = ['PCI', 'Cell Identity', 'eNB_ID', 'Band ID', 'DL frequency', 'UL frequency', 'DL bandwidth', 'UL bandwidth', 'TAC', 'MCC', 'MNC']
    for i, row in table.iterrows():
        if row['type'] == 'LTE_RRC_Serv_Cell_Info':
            is_not_start = False
            info_to_fill = row[selected_cols].to_list()
            continue
        if is_not_start:
            continue
        table.loc[i, selected_cols] = info_to_fill

    table = table[table['type'] != 'LTE_RRC_Serv_Cell_Info'].reset_index(drop=True)
    
    # parse source & target cells
    for i, row in table.iterrows():
        table.loc[i, ['m_src', 'm_tgt', 's_src', 's_tgt']] = parse_trans(row['trans'])
    
    # distinguish intra/inter-eNB HO
    table1 = table[np.in1d(table['type'], ['SCLA', 'SCGA', 'SCGR-I', 'SCGF'])]
    table = table[~np.in1d(table['type'], ['SCLA', 'SCGA', 'SCGR-I', 'SCGF'])].reset_index(drop=True)
    
    table['next_eNB'] = table['eNB_ID'].shift(-1)
    for i, row in table.iloc[:-1].iterrows():
        if row['eNB_ID'] != row['next_eNB'] and row['type'] not in ['CXNS', 'CXNR']:
            if row['others'] == '':
                table.at[i, 'others'] = 'Inter eNB HO.'
            else:
                table.at[i, 'others'] += ' Inter eNB HO.'
    
    table = pd.concat([table, table1], ignore_index=True).sort_values(by='start').reset_index(drop=True)
    
    # label SCG Addition near after SCG Failure
    table1 = table[~np.in1d(table['type'], ['SCGA', 'SCGR-I', 'SCGR-II'])]
    table = table[np.in1d(table['type'], ['SCGA', 'SCGR-I', 'SCGR-II'])].reset_index(drop=True)
    
    table['prev_cmt'] = table['others'].shift(1)
    for i, row in table.iloc[1:].iterrows():
        if row['type'] == 'SCGA':
            if 'Near after SN_Rel' in row['others'] and 'Caused by scg-failure' in row['prev_cmt']:
                table.at[i, 'others'] += ' Caused by scg-failure.'
    
    # combine closed SCG Addition & Release pair (which are not caused by scg-failure or RLF) into SCG Change
    table['next_end'] = table['end'].shift(-1)
    table['next_cmt'] = table['others'].shift(-1)
    table['next_s_tgt'] = table['s_tgt'].shift(-1)
    indices_to_remove = []
    for i, row in table.iloc[:-1].iterrows():
        if row['type'] == 'SCGR-I' and 'Near after SN_Rel' in row['next_cmt'] and row['s_src'] != row['next_s_tgt']:
            table.at[i, 'end'] = row['next_end']
            table.at[i, 's_tgt'] = row['next_s_tgt']
            table.at[i, 'type'] = 'SCGC-I'
            indices_to_remove.append(i+1)
        if row['type'] == 'SCGR-II' and 'Near after MN_HO_to_eNB' in row['next_cmt'] and row['s_src'] != row['next_s_tgt']:
            table.at[i, 'end'] = row['next_end']
            table.at[i, 's_tgt'] = row['next_s_tgt']
            table.at[i, 'type'] = 'SCGC-II'
            indices_to_remove.append(i+1)
    table = table.drop(indices_to_remove)
    
    table = pd.concat([table, table1], ignore_index=True).sort_values(by='start').reset_index(drop=True)
    
    # re-classify eNB HO & MeNB HO
    table.loc[np.in1d(table['type'], ['LTEH']) & table['others'].str.contains('Inter eNB HO'), 'type'] = 'ENBH'
    table.loc[np.in1d(table['type'], ['MCGH']) & table['others'].str.contains('Inter eNB HO'), 'type'] = 'MNBH'
    
    # add the next eNB ID when meeting inter-eNB HO
    table1 = table[~table['others'].str.contains('Inter eNB HO')]
    table = table[table['others'].str.contains('Inter eNB HO')].reset_index(drop=True)
    
    table['next_eNB_ID'] = table['eNB_ID'].shift(-1)
    
    table = pd.concat([table, table1], ignore_index=True).sort_values(by='start').reset_index(drop=True)
    
    # detect band change and add the next Band ID when meeting inter-Freq HO
    band_mapping = {}
    for i, row in table[~table.duplicated(subset=['DL frequency'])].dropna(subset=['DL frequency']).iterrows():
        band_mapping[int(row['DL frequency'])] = row['Band ID']
    
    print(band_mapping)
    
    table1 = table[~table['others'].str.contains('Inter frequency HO')]
    table = table[table['others'].str.contains('Inter frequency HO')].reset_index(drop=True)
    
    table['next_BID'] = table['Band ID'].shift(-1)
    try:
        table.at[len(table)-1, 'next_BID'] = band_mapping[ast.literal_eval(table.iloc[-1]['m_tgt'])[1]]
    except:
        pass
    
    table = pd.concat([table, table1], ignore_index=True).sort_values(by='start').reset_index(drop=True)
    
    # check whether RLF is near after an HO event
    table1 = table[np.in1d(table['type'], ['CXNS', 'CXNR', 'SCLA'])]
    table = table[~np.in1d(table['type'], ['CXNS', 'CXNR', 'SCLA'])].reset_index(drop=True)
    
    table['next_start'] = table['start'].shift(-1)
    table['next_type'] = table['type'].shift(-1)
    table['near_before_RLF'] = False
    for i, row in table.iloc[:-1].iterrows():
        # check whether an HO event is near before an RLF (in 3 seconds)
        if row['type'] in ['LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 'SCGA', 'SCGR-I', 'SCGR-II', 'SCGC-I', 'SCGC-II'] and \
            row['next_type'] in ['NASR', 'MCGF', 'SCGF']:
                if row['end'] > row['next_start'] - pd.Timedelta(seconds=1):
                    table.at[i, 'near_before_RLF'] = True
                    next_type = row['next_type']
                    if row['others'] == '':
                        table.at[i, 'others'] = f'Near before {next_type} 1 sec.'
                    else:
                        table.at[i, 'others'] += f' Near before {next_type} 1 sec.'
                        
                elif row['end'] > row['next_start'] - pd.Timedelta(seconds=2):
                    table.at[i, 'near_before_RLF'] = True
                    next_type = row['next_type']
                    if row['others'] == '':
                        table.at[i, 'others'] = f'Near before {next_type} 2 sec.'
                    else:
                        table.at[i, 'others'] += f' Near before {next_type} 2 sec.'
                        
                elif row['end'] > row['next_start'] - pd.Timedelta(seconds=3):
                    table.at[i, 'near_before_RLF'] = True
                    next_type = row['next_type']
                    if row['others'] == '':
                        table.at[i, 'others'] = f'Near before {next_type} 3 sec.'
                    else:
                        table.at[i, 'others'] += f' Near before {next_type} 3 sec.'
                    
    table = pd.concat([table, table1], ignore_index=True).sort_values(by='start').reset_index(drop=True)
    
    # add category
    table['category'] = 'Others'
    table.loc[np.in1d(table['type'], ['LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II']), 'category'] = 'HO'
    table.loc[np.in1d(table['type'], ['MCGF', 'NASR', 'SCGF']), 'category'] = 'RLF'

    # add failure cause
    failure_cause = [
        'reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)',
        't310-Expiry (0)', 'randomAccessProblem (1)', 'rlc-MaxNumRetx (2)', 'synchReconfigFailureSCG (3)', 'scg-ReconfigFailure (4)', 'srb3-IntegrityFailure (5)', 'other-r16 (6)'
    ]
    
    for tag in failure_cause:
        table.loc[table['others'].str.contains(tag, regex=False), 'cause'] = tag
        table['others'] = table['others'].str.replace(f" {tag}.", "", regex=False)
        table['others'] = table['others'].str.replace(f"{tag}.", "", regex=False)
    
    # add Access Technology type
    table['4G_5G'] = '4G'
    table.loc[np.in1d(table['type'], ['SCGM', 'SCGA', 'SCGR-I', 'SCGC-I', 'SCGF']), '4G_5G'] = '5G'
    table.loc[np.in1d(table['type'], ['SCGR-II', 'SCGC-II']), '4G_5G'] = '4G_5G'
    
    # add more boolean columns
    table['inter-eNB'] = False
    table.loc[table['others'].str.contains('Inter eNB HO'), 'inter-eNB'] = True
    table['others'] = table['others'].str.replace(" Inter eNB HO.", "")
    table['others'] = table['others'].str.replace("Inter eNB HO.", "")
    
    table['inter-Freq'] = False
    table.loc[table['others'].str.contains('Inter frequency HO'), 'inter-Freq'] = True
    table['others'] = table['others'].str.replace(" Inter frequency HO.", "")
    table['others'] = table['others'].str.replace("Inter frequency HO.", "")
    
    table['band_cng'] = False
    table.loc[table['inter-Freq'] & (table['Band ID'] != table['next_BID']), 'band_cng'] = True
    
    table['inter-RAT'] = False
    table.loc[np.in1d(table['type'], ['SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II']), 'inter-RAT'] = True
    
    table['inter-gNB'] = False
    table.loc[np.in1d(table['type'], ['SCGC-I', 'SCGC-II']), 'inter-gNB'] = True
    
    # ignore CXNS, CXNR, SCLA
    table = table[~np.in1d(table['type'], ['CXNS', 'CXNR', 'SCLA'])].reset_index(drop=True)
    
    # remove SCG Addition, Release caused by SCG Failure or any other RLFs if needed (default: True)
    if radical:
        table = table[~((table['others'].str.contains('Caused by scg-failure') | table['others'].str.contains('Near after RLF')))].reset_index(drop=True)
    
    # select columns
    selected_cols = ['PCI', 'Cell Identity', 'eNB_ID', 'next_eNB_ID', 'Band ID', 'next_BID', 'DL frequency', 'UL frequency', 'DL bandwidth', 'UL bandwidth', 'TAC', 'MCC', 'MNC']
    table = table[['type', 'start', 'end', 'others', 'm_src', 'm_tgt', 's_src', 's_tgt', 'category', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-gNB', 'inter-RAT', '4G_5G', 'cause', 'near_before_RLF', *selected_cols]]
    
    return table, new_D

In [68]:
def handover_classify_labelling(df_ho, df_dl=None, df_ul=None, ratio=0.5, scope=None):
    
    if scope is None:
        scope = {
            'LTEH': (-4.0, 2.0, 0.018),
            'ENBH': (-4.0, 2.6, 0.019),
            'MCGH': (-3.0, 3.3, 0.019),
            'MNBH': (-3.2, 3.3, 0.02),
            'SCGM': (-4.2, 3.3, 0.017),
            'SCGA': (-1.2, 2.6, 0.027),
            'SCGR-I': (-3.1, 3.3, 0.04),
            'SCGC-I': (-3.0, 2.9, 0.37),
            'SCGR-II': (-2.0, 4.1, 0.034),
            'SCGC-II': (-2.0, 3.2, 0.396),
            'MCGF': (-5.8, 7.2, 0.078),
            'NASR': (-4.0, 6.5, 0.394),
            'SCGF': (-4.0, 4.6, 0.111)
        }
        
    def interp(x, y, ratio):
        """
        Args:
            x, y (datetime.datetime): x < y
            ratio (float): a decimal numeral in a range [0, 1]; 0 means break at x, 1 means break at y.
        Returns:
            (datetime.datetime): breakpoint of interpolation
        """
        return x + (y - x) * ratio

    Metrics = namedtuple('Metrics', ['dl_pkt', 'dl_lost', 'dl_excl', 'ul_pkt', 'ul_lost', 'ul_excl'])
    Duration = namedtuple('Duration', ['total', 'stable', 'unstable'])

    mcgf = [f'MCGF_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
    nasr = [f'NASR_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
    scgf = [f'SCGF_{suffix}' for suffix in ['t310-Expiry (0)', 'randomAccessProblem (1)', 'rlc-MaxNumRetx (2)', 'synchReconfigFailureSCG (3)', 'scg-ReconfigFailure (4)', 'srb3-IntegrityFailure (5)', 'other-r16 (6)']]

    E = { stage: { key: [] for key in [*scope.keys(), *mcgf, *nasr, *scgf] } for stage in ['before', 'during', 'after']}
    E['overview'] = {}

    selected_cols = ['stage', 'category', 'type', 'cause', 'index', 'inter-RAT', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-gNB']
    reset_values = ['-', 'stable', 'stable', '-', -1, False, False, False, False, False]
    
    dl_start_time, dl_end_time = pd.Timestamp.max, pd.Timestamp.min
    if df_dl is not None:
        dl_start_time, dl_end_time = df_dl.iloc[0]['Timestamp'] - pd.Timedelta(seconds=0.1), df_dl.iloc[-1]['Timestamp'] + pd.Timedelta(seconds=0.1)
        # df_dl = df_dl.reindex(columns=[*list(df_dl.columns), *selected_cols])
        # Reset the specified columns using a loop
        for col, reset_value in zip(selected_cols, reset_values):
            df_dl[col] = reset_value
    
    ul_start_time, ul_end_time = pd.Timestamp.max, pd.Timestamp.min
    if df_ul is not None:
        ul_start_time, ul_end_time = df_ul.iloc[0]['Timestamp'] - pd.Timedelta(seconds=0.1), df_ul.iloc[-1]['Timestamp'] + pd.Timedelta(seconds=0.1)
        # df_ul = df_ul.reindex(columns=[*list(df_ul.columns), *selected_cols])
        # Reset the specified columns using a loop
        for col, reset_value in zip(selected_cols, reset_values):
            df_ul[col] = reset_value
            
    start_time, end_time = min(dl_start_time, ul_start_time), max(dl_end_time, ul_end_time)
    stable_interval = P.open(start_time, end_time)
    for i, row in df_ho.iterrows():
        prior_row = df_ho.iloc[i-1] if i != 0 else None
        post_row = df_ho.iloc[i+1] if i != len(df_ho) - 1 else None
        
        # Peek the next event to avoid HO overlapping with handoverFailure (skip and set the next prior event)
        if i != len(df_ho) - 1 and pd.notna(row.end) and row.end > post_row.start:
            print('Overlapping event occurs!!')
            print(i, row['start'], row['end'], row['type'], row['cause'])
            print(i+1, post_row['start'], post_row['end'], post_row['type'], post_row['cause'])
            continue
        if i != 0 and pd.notna(prior_row.end) and prior_row.end > row.start:
            prior_row = df_ho.iloc[i-2] if i > 1 else None
        
        # Basic information
        tag = row['type']  # specific column name
        cause = row['cause']
        start, end = row['start'], row['end']  # handover start/end time
        intr = (end - start).total_seconds() if pd.notna(end) else 0  # handover interruption time
        ho_info = [row['category'], row['type'], row['cause'], i, row['inter-RAT'], row['inter-eNB'], row['inter-Freq'], row['band_cng'], row['inter-gNB']]
        
        # peri_interval
        if pd.isna(row.end):
            peri_interval = P.singleton(row.start)
        else:
            peri_interval = P.closed(row.start, row.end)

        # prior_interval
        C = row.start + pd.Timedelta(seconds=scope[tag][0])
        D = row.start
        prior_interval = P.closedopen(C, D)
        if ratio is not None and i != 0:
            prior_tag = prior_row['type']
            A = max(prior_row.start, prior_row.end)
            B = max(prior_row.start, prior_row.end) + pd.Timedelta(seconds=scope[prior_tag][1]-scope[prior_tag][2])
            if P.openclosed(A, B).overlaps(prior_interval):
                # print("Overlaps with the previous!")
                bkp = interp(C, B, ratio)
                bkp = max(bkp, A)  # avoid the breakpoint overlapping the previous event's duration
                # bkp = min(max(bkp, A), D)  # 我不侵犯到其他任何人，代表其他人也不會侵犯到我！可不加！
                prior_interval = P.closedopen(bkp, D)
                if A in prior_interval:
                    prior_interval = P.open(bkp, D)
        
        # post_interval
        C = row.end
        D = row.end + pd.Timedelta(seconds=scope[tag][1]-scope[tag][2])
        post_interval = P.openclosed(C, D)
        if ratio is not None and i != len(df_ho)-1:
            post_tag = post_row['type']
            A = min(post_row.start, post_row.end) + pd.Timedelta(seconds=scope[post_tag][0])
            B = min(post_row.start, post_row.end)
            if P.closedopen(A, B).overlaps(post_interval):
                # print("Overlaps with the following!")
                bkp = interp(A, D, ratio)
                bkp = min(bkp, B)  # avoid the breakpoint overlappint the following event's duration
                # bkp = max(min(bkp, B), C)  # 我不侵犯到其他任何人，代表其他人也不會侵犯到我！可不加！
                post_interval = P.open(C, bkp)
        
        # calculate lost & excl
        dl_pkt, dl_lost, dl_excl = [0, 0, 0], [0, 0, 0], [0, 0, 0]
        if df_dl is not None:
            for i, (stage, intv) in enumerate(zip(['before', 'during', 'after'], [prior_interval, peri_interval, post_interval])):
                if intv.empty:
                    continue
                filt = (df_dl['arr_time'] >= intv.lower) & (df_dl['arr_time'] < intv.upper)
                tmp = df_dl[filt].copy()
                dl_pkt[i] = len(tmp)
                dl_lost[i] = sum(tmp['lost'])
                dl_excl[i] = sum(~tmp['lost'] & tmp['excl'])
                df_dl.loc[filt, selected_cols] = [stage, *ho_info]
        
        ul_pkt, ul_lost, ul_excl = [0, 0, 0], [0, 0, 0], [0, 0, 0]
        if df_ul is not None:
            for i, (stage, intv) in enumerate(zip(['before', 'during', 'after'], [prior_interval, peri_interval, post_interval])):
                if intv.empty:
                    continue
                filt = (df_ul['xmit_time'] >= intv.lower) & (df_ul['xmit_time'] < intv.upper)
                tmp = df_ul[filt].copy()
                ul_pkt[i] = len(tmp)
                ul_lost[i] = sum(tmp['lost'])
                ul_excl[i] = sum(~tmp['lost'] & tmp['excl'])
                df_ul.loc[filt, selected_cols] = [stage, *ho_info]
        
        # fill in the blank
        for i, (stage, intv) in enumerate(zip(['before', 'during', 'after'], [prior_interval, peri_interval, post_interval])):
            E[stage][tag].append((intv, Metrics(dl_pkt[i], dl_lost[i], dl_excl[i], ul_pkt[i], ul_lost[i], ul_excl[i])))
            if tag in ['MCGF', 'NASR', 'SCGF']:
                E[stage][f'{tag}_{cause}'].append((intv, Metrics(dl_pkt[i], dl_lost[i], dl_excl[i], ul_pkt[i], ul_lost[i], ul_excl[i])))
        
        # update stable interval
        stable_interval = stable_interval - prior_interval - peri_interval - post_interval
    
    stable_dl_pkt, stable_dl_lost, stable_dl_excl = 0, 0, 0
    stable_ul_pkt, stable_ul_lost, stable_ul_excl = 0, 0, 0
    if df_dl is not None:
        tmp = df_dl[df_dl['category'] == 'stable'].copy()
        stable_dl_pkt = len(tmp)
        stable_dl_lost = sum(tmp['lost'])
        stable_dl_excl = sum(~tmp['lost'] & tmp['excl'])
    
    if df_ul is not None:
        tmp = df_ul[df_ul['category'] == 'stable'].copy()
        stable_ul_pkt = len(tmp)
        stable_ul_lost = sum(tmp['lost'])
        stable_ul_excl = sum(~tmp['lost'] & tmp['excl'])
    
    total_duration = (end_time - start_time).total_seconds()
    stable_duration = 0
    for intv in stable_interval:
        if intv.empty:
            continue
        stable_duration += (intv.upper - intv.lower).total_seconds()
    unstable_duration = total_duration - stable_duration
    
    E['overview']['stable_intv'] = (stable_interval, Metrics(stable_dl_pkt, stable_dl_lost, stable_dl_excl, stable_ul_pkt, stable_ul_lost, stable_ul_excl))
    E['overview']['duration'] = Duration(total_duration, stable_duration, unstable_duration)

    return E, df_dl, df_ul

In [69]:
def metrics_duration_into_dict(E):
    mcgf = [f'MCGF_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
    nasr = [f'NASR_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
    scgf = [f'SCGF_{suffix}' for suffix in ['t310-Expiry (0)', 'randomAccessProblem (1)', 'rlc-MaxNumRetx (2)', 'synchReconfigFailureSCG (3)', 'scg-ReconfigFailure (4)', 'srb3-IntegrityFailure (5)', 'other-r16 (6)']]

    A = { stage: { key: [] for key in ['LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
                                    'MCGF', 'NASR', 'SCGF', *mcgf, *nasr, *scgf] } for stage in ['before', 'during', 'after'] }
    A['overview'] = {}

    for stage in ['before', 'during', 'after']:
        for tag, lst in E[stage].items():
            for item in lst:
                A[stage][tag].append((item[0], item[1]._asdict()))
    A['overview']['stable_intv'] = (E['overview']['stable_intv'][0], E['overview']['stable_intv'][1]._asdict())
    A['overview']['duration'] = E['overview']['duration']._asdict()
    
    return A

def dict_into_metrics_duration(A):
    Metrics = namedtuple('Metrics', ['dl_pkt', 'dl_lost', 'dl_excl', 'ul_pkt', 'ul_lost', 'ul_excl'])
    Duration = namedtuple('Duration', ['total', 'stable', 'unstable'])

    mcgf = [f'MCGF_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
    nasr = [f'NASR_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
    scgf = [f'SCGF_{suffix}' for suffix in ['t310-Expiry (0)', 'randomAccessProblem (1)', 'rlc-MaxNumRetx (2)', 'synchReconfigFailureSCG (3)', 'scg-ReconfigFailure (4)', 'srb3-IntegrityFailure (5)', 'other-r16 (6)']]

    B = { stage: { key: [] for key in ['LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
                                    'MCGF', 'NASR', 'SCGF', *mcgf, *nasr, *scgf] } for stage in ['before', 'during', 'after'] }
    B['overview'] = {}

    for stage in ['before', 'during', 'after']:
        for tag, lst in A[stage].items():
            for item in lst:
                B[stage][tag].append((item[0], Metrics(**item[1])))
    B['overview']['stable_intv'] = (A['overview']['stable_intv'][0], Metrics(**A['overview']['stable_intv'][1]))
    B['overview']['duration'] = Duration(**A['overview']['duration'])
    
    return B

In [70]:
evt_types = [
    'LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II', # HO
    'MCGF', 'NASR', 'SCGF', # RLF
    'CXNS', 'CXNR', 'SCLA', # Others
]

evt_category = {
    'HO': {
        'intra-RAT 4G': ['LTEH', 'ENBH', 'MCGH', 'MNBH'],
        'intra-RAT 5G': ['SCGM'],
        'inter-RAT': ['SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II']
    },
    'RLF': {
        'MCGF': ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)'],
        'NASR': ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)'],
        'SCGF': ['t310-Expiry (0)', 'randomAccessProblem (1)', 'rlc-MaxNumRetx (2)', 'synchReconfigFailureSCG (3)', 'scg-ReconfigFailure (4)', 'srb3-IntegrityFailure (5)', 'other-r16 (6)']
    }
}

# Dual Testing

## Single Preliminary

In [71]:
rrc_file1 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm01/#02/data/diag_log_sm01_2023-05-26_13-36-22_rrc.csv"
dl_file1 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm01/#02/data/udp_dnlk_loss_latency.csv"
ul_file1 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm01/#02/data/udp_uplk_loss_latency.csv"

rrc_file2 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm00/#02/data/diag_log_sm00_2023-05-26_13-36-22_rrc.csv"
dl_file2 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm00/#02/data/udp_dnlk_loss_latency.csv"
ul_file2 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm00/#02/data/udp_uplk_loss_latency.csv"

In [73]:
mcgf = [f'MCGF_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
nasr = [f'NASR_{suffix}' for suffix in ['reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)']]
scgf = [f'SCGF_{suffix}' for suffix in ['t310-Expiry (0)', 'randomAccessProblem (1)', 'rlc-MaxNumRetx (2)', 'synchReconfigFailureSCG (3)', 'scg-ReconfigFailure (4)', 'srb3-IntegrityFailure (5)', 'other-r16 (6)']]

evt_types = [
    'LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
    'MCGF', *mcgf,
    'NASR', *nasr,
    'SCGF', *scgf,
]

# record all traces' info in each type list
D = {tag: [] for tag in evt_types}
D['stable_mets'] = []
D['duration'] = []

Metrics = namedtuple('Metrics', ['dl_pkt', 'dl_lost', 'dl_excl', 'ul_pkt', 'ul_lost', 'ul_excl'])
Stats = namedtuple('Stats', ['count', 'duration', 'metrics'])
Stage = namedtuple('Stage', ['before', 'during', 'after'])

for rrc_file, dl_file, ul_file in zip([rrc_file1, rrc_file2], [dl_file1, dl_file2], [ul_file1, ul_file2]):
    ########### in for loop (iterating exprs) ##### start
    df_ho, _ = mi_parse_handover(pd.read_csv(rrc_file), radical=True)
    df_dl = set_data(pd.read_csv(dl_file))
    df_ul = set_data(pd.read_csv(ul_file))
    E, df_dl, df_ul = handover_classify_labelling(df_ho, df_dl, df_ul)

    # store handover labelling to avoid parsing again and again
    df_dl.to_pickle(os.path.join(os.path.dirname(dl_file), 'udp_dnlk_loss_latency_ho.pkl'))
    df_ul.to_pickle(os.path.join(os.path.dirname(ul_file), 'udp_uplk_loss_latency_ho.pkl'))
    with open(os.path.join(os.path.dirname(rrc_file), 'single_radio_ho_stats.pkl'), 'wb') as f:
        pickle.dump(metrics_duration_into_dict(E), f)

    for tag in evt_types:
        # print(tag)
        infos = []
        for i, stage in enumerate(['before', 'during', 'after']):
            # count
            count = len(E[stage][tag])
            # duration
            duration = 0
            intv_list = [item[0] for item in E[stage][tag]]
            for intv in intv_list:
                if intv.empty:
                    continue
                duration += (intv.upper - intv.lower).total_seconds()
            # metrics
            mets_list = [item[1] for item in E[stage][tag]]
            metrics = Metrics(sum([item.dl_pkt for item in mets_list]), sum([item.dl_lost for item in mets_list]), sum([item.dl_excl for item in mets_list]), sum([item.ul_pkt for item in mets_list]), sum([item.ul_lost for item in mets_list]), sum([item.ul_excl for item in mets_list]))
            
            # print(stage, count, duration, metrics)
            infos.append(Stats(count, duration, metrics))
        D[tag].append(Stage(*infos))
        
    D['stable_mets'].append(E['overview']['stable_intv'][1])
    D['duration'].append(E['overview']['duration'])
    ########### in for loop (iterating exprs) ##### end

pprint(D, sort_dicts=False)

{1750: 3.0}
{3050: 7.0, 1750: 3.0}
{'LTEH': [Stage(before=Stats(count=0, duration=0, metrics=Metrics(dl_pkt=0, dl_lost=0, dl_excl=0, ul_pkt=0, ul_lost=0, ul_excl=0)), during=Stats(count=0, duration=0, metrics=Metrics(dl_pkt=0, dl_lost=0, dl_excl=0, ul_pkt=0, ul_lost=0, ul_excl=0)), after=Stats(count=0, duration=0, metrics=Metrics(dl_pkt=0, dl_lost=0, dl_excl=0, ul_pkt=0, ul_lost=0, ul_excl=0))),
          Stage(before=Stats(count=0, duration=0, metrics=Metrics(dl_pkt=0, dl_lost=0, dl_excl=0, ul_pkt=0, ul_lost=0, ul_excl=0)), during=Stats(count=0, duration=0, metrics=Metrics(dl_pkt=0, dl_lost=0, dl_excl=0, ul_pkt=0, ul_lost=0, ul_excl=0)), after=Stats(count=0, duration=0, metrics=Metrics(dl_pkt=0, dl_lost=0, dl_excl=0, ul_pkt=0, ul_lost=0, ul_excl=0)))],
 'ENBH': [Stage(before=Stats(count=1, duration=0, metrics=Metrics(dl_pkt=0, dl_lost=0, dl_excl=0, ul_pkt=0, ul_lost=0, ul_excl=0)), during=Stats(count=1, duration=0.018717, metrics=Metrics(dl_pkt=8, dl_lost=8, dl_excl=0, ul_pkt=0, ul_lo

## Dual Start

### Combinations

- 當我只是要看兩兩事件組合的話，data 和 event 都要用組合 (combinations)
- 當我要依主/從 radio 去分析時，data 和 event 都要用 product

In [306]:
rrc_file1 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm01/#02/data/diag_log_sm01_2023-05-26_13-36-22_rrc.csv"
dl_file1 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm01/#02/data/udp_dnlk_loss_latency_ho.pkl"
ul_file1 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm01/#02/data/udp_uplk_loss_latency_ho.pkl"

rrc_file2 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm00/#02/data/diag_log_sm00_2023-05-26_13-36-22_rrc.csv"
dl_file2 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm00/#02/data/udp_dnlk_loss_latency_ho.pkl"
ul_file2 = "/Users/jackbedford/Desktop/MOXA/Code/data/2023-05-26/Bandlock_UDP_8_Phone/sm00/#02/data/udp_uplk_loss_latency_ho.pkl"

filenames = [(dl_file1, dl_file2, rrc_file1, rrc_file2), (dl_file2, dl_file1, rrc_file2, rrc_file1)]

In [307]:
selected_cols = ['seq', 'Timestamp', 'lost', 'excl', 'latency', 'xmit_time', 'arr_time',
                 'stage', 'category', 'type', 'cause', 'index', 'inter-RAT', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-gNB']

ho_types = [
    'NASR', 'MCGF', 'SCGF',
    'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
    'LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 
]

# list(it.product(ho_types, repeat=2))
# list(it.combinations_with_replacement(ho_types, 2))

stats_table_list = []
for dl_file1, dl_file2, rrc_file1, rrc_file2 in filenames:
    ########### in for loop (iterating exprs) ##### start
    df = pd.read_pickle(dl_file1)[selected_cols]
    df1 = pd.read_pickle(dl_file2)[selected_cols]
    # df_ho, _ = mi_parse_handover(pd.read_csv(rrc_file1))
    # df_ho1, _ = mi_parse_handover(pd.read_csv(rrc_file2))

    df = df.merge(df1, on='seq')
    del df1

    df['type_x'] = df['type_x'].astype('category')
    df['type_x'] = df['type_x'].cat.set_categories(ho_types)
    df['type_y'] = df['type_y'].astype('category')
    df['type_y'] = df['type_y'].cat.set_categories(ho_types)

    df['lost_x&y'] = df['lost_x'] & df['lost_y']
    df['lost_x^y'] = df['lost_x'] ^ df['lost_y']
    df['lost_x|y'] = df['lost_x'] | df['lost_y']

    df['loex_x'] = df['excl_x'].copy()
    df['loex_y'] = df['excl_y'].copy()
    df['excl_x'] = ~df['lost_x'] & df['loex_x']
    df['excl_y'] = ~df['lost_y'] & df['loex_y']

    df['loex_x&y'] = df['loex_x'] & df['loex_y']
    df['loex_x^y'] = df['loex_x'] ^ df['loex_y']
    df['loex_x|y'] = df['loex_x'] | df['loex_y']

    df['excl_x&y'] = df['lost_x&y'] ^ df['loex_x&y']
    df['excl_x^y'] = ~(df['lost_x&y'] | df['lost_x^y']) & df['loex_x^y']
    df['excl_x|y'] = df['excl_x&y'] ^ df['excl_x^y']

    df['total'] = True

    table = df.groupby(['type_x', 'type_y']) \
        [['lost_x', 'lost_y', 'lost_x&y', 'lost_x^y', 'lost_x|y', 
        'excl_x', 'excl_y', 'excl_x&y', 'excl_x^y', 'excl_x|y', 
        'loex_x', 'loex_y', 'loex_x&y', 'loex_x^y', 'loex_x|y', 'total']].sum()
    
    # # complete table
    # display(table.reset_index())
    # # complete dictionary
    # pprint(table.to_dict(orient='index'), sort_dicts=False)
    # # recover the complete table from dictionary
    # display(pd.DataFrame(table.to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())
        
    # # table without all-zero rows
    # display(table[(table != 0).any(axis=1)].reset_index())
    # # dictionary without all-zero rows
    # pprint(table[(table != 0).any(axis=1)].to_dict(orient='index'), sort_dicts=False)
    # # recover the table without all-zero rows from dictionary
    # display(pd.DataFrame(table[(table != 0).any(axis=1)].to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())

    stats_table_list.append(table)
    del table
    ########### in for loop (iterating exprs) ##### end

In [309]:
# 执行逐元素相加操作并得到总和
# cnt_table = stats_table_list[0]
# for df in stats_table_list[1:]:
#     cnt_table = cnt_table.add(df)
cnt_table = reduce(lambda x, y: x.add(y, fill_value=0), stats_table_list)
cnt_dict = cnt_table.to_dict(orient='index')

ho_combos = list(it.combinations_with_replacement(ho_types, 2))
for combo in ho_combos:
    if combo[0] == combo[1]:
        continue
    combo1 = (combo[1], combo[0])
    # print(combo, combo1)
    
    cnt_dict[combo]['lost_x'] += cnt_dict[combo1]['lost_y']
    cnt_dict[combo]['lost_y'] += cnt_dict[combo1]['lost_x']
    cnt_dict[combo]['excl_x'] += cnt_dict[combo1]['excl_y']
    cnt_dict[combo]['excl_y'] += cnt_dict[combo1]['excl_x']
    cnt_dict[combo]['loex_x'] += cnt_dict[combo1]['loex_y']
    cnt_dict[combo]['loex_y'] += cnt_dict[combo1]['loex_x']
    
    cnt_dict[combo]['lost_x&y'] += cnt_dict[combo1]['lost_x&y']
    cnt_dict[combo]['lost_x^y'] += cnt_dict[combo1]['lost_x^y']
    cnt_dict[combo]['lost_x|y'] += cnt_dict[combo1]['lost_x|y']
    cnt_dict[combo]['excl_x&y'] += cnt_dict[combo1]['excl_x&y']
    cnt_dict[combo]['excl_x^y'] += cnt_dict[combo1]['excl_x^y']
    cnt_dict[combo]['excl_x|y'] += cnt_dict[combo1]['excl_x|y']
    cnt_dict[combo]['loex_x&y'] += cnt_dict[combo1]['loex_x&y']
    cnt_dict[combo]['loex_x^y'] += cnt_dict[combo1]['loex_x^y']
    cnt_dict[combo]['loex_x|y'] += cnt_dict[combo1]['loex_x|y']
    
    cnt_dict[combo]['total'] += cnt_dict[combo1]['total']
    
    del cnt_dict[combo1]

cnt_table = pd.DataFrame(cnt_dict).T.rename_axis(['type_x', 'type_y'])

# 打印结果
# display(cnt_table[(cnt_table != 0).any(axis=1)])

# 創建新表格
rate_table = pd.DataFrame()
# 选择所有分子列
molecule_cols = cnt_table.columns[:-1]
# 计算百分比
df_percentage = (cnt_table[molecule_cols].div(cnt_table['total'], axis=0) * 100).round(3)
df_percentage.columns = [f'{col}_r' for col in molecule_cols]
# save / blown save
rate_table['lost_xy_svr'] = (cnt_table['lost_x^y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # save
rate_table['lost_xy_bsr'] = (cnt_table['lost_x&y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # blown save
rate_table['excl_xy_svr'] = (cnt_table['excl_x^y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # save
rate_table['excl_xy_bsr'] = (cnt_table['excl_x&y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # blown save
rate_table['loex_xy_svr'] = (cnt_table['loex_x^y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # save
rate_table['loex_xy_bsr'] = (cnt_table['loex_x&y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # blown save
# 合併表格
rate_table = pd.concat([rate_table, cnt_table['total'], df_percentage], axis=1)
rate_table = rate_table[['lost_xy_svr', 'lost_xy_bsr', 'lost_x_r', 'lost_y_r', 'lost_x&y_r', 'lost_x^y_r', 'lost_x|y_r',
                         'excl_xy_svr', 'excl_xy_bsr', 'excl_x_r', 'excl_y_r', 'excl_x&y_r', 'excl_x^y_r', 'excl_x|y_r',
                         'loex_xy_svr', 'loex_xy_bsr', 'loex_x_r', 'loex_y_r', 'loex_x&y_r', 'loex_x^y_r', 'loex_x|y_r', 'total']]

# 打印结果
display(cnt_table[cnt_table['total'] != 0].reset_index())
display(rate_table[rate_table['total'] != 0].reset_index())

Unnamed: 0,type_x,type_y,lost_x,lost_y,lost_x&y,lost_x^y,lost_x|y,excl_x,excl_y,excl_x&y,excl_x^y,excl_x|y,loex_x,loex_y,loex_x&y,loex_x^y,loex_x|y,total
0,MCGF,MCGF,230,230,228,4,232,1,1,2,0,2,231,231,230,2,232,4096
1,MCGF,ENBH,80,80,80,0,80,0,0,0,0,0,80,80,80,0,80,80
2,MCGF,MCGH,0,0,0,0,0,0,36,0,36,36,0,36,0,36,36,1440
3,MCGF,MNBH,118,0,0,118,118,514,56,38,494,532,632,56,38,612,650,3902
4,MCGF,SCGM,338,0,0,338,338,18,2,0,20,20,356,2,0,358,358,7450
5,SCGA,MNBH,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,562
6,SCGA,SCGM,0,0,0,0,0,4,0,0,4,4,4,0,0,4,4,2024
7,SCGR-I,SCGM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3426
8,SCGC-II,MNBH,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,276
9,SCGC-II,SCGM,0,0,0,0,0,16,24,0,40,40,16,24,0,40,40,6538


Unnamed: 0,type_x,type_y,lost_xy_svr,lost_xy_bsr,lost_x_r,lost_y_r,lost_x&y_r,lost_x^y_r,lost_x|y_r,excl_xy_svr,excl_xy_bsr,excl_x_r,excl_y_r,excl_x&y_r,excl_x^y_r,excl_x|y_r,loex_xy_svr,loex_xy_bsr,loex_x_r,loex_y_r,loex_x&y_r,loex_x^y_r,loex_x|y_r,total
0,MCGF,MCGF,1.724,98.276,5.615,5.615,5.566,0.098,5.664,0.0,100.0,0.024,0.024,0.049,0.0,0.049,0.862,99.138,5.64,5.64,5.615,0.049,5.664,4096
1,MCGF,ENBH,0.0,100.0,100.0,100.0,100.0,0.0,100.0,,,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,100.0,100.0,0.0,100.0,80
2,MCGF,MCGH,,,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,2.5,0.0,2.5,2.5,100.0,0.0,0.0,2.5,0.0,2.5,2.5,1440
3,MCGF,MNBH,100.0,0.0,3.024,0.0,0.0,3.024,3.024,92.857,7.143,13.173,1.435,0.974,12.66,13.634,94.154,5.846,16.197,1.435,0.974,15.684,16.658,3902
4,MCGF,SCGM,100.0,0.0,4.537,0.0,0.0,4.537,4.537,100.0,0.0,0.242,0.027,0.0,0.268,0.268,100.0,0.0,4.779,0.027,0.0,4.805,4.805,7450
5,SCGA,MNBH,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,562
6,SCGA,SCGM,,,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.198,0.0,0.0,0.198,0.198,100.0,0.0,0.198,0.0,0.0,0.198,0.198,2024
7,SCGR-I,SCGM,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,3426
8,SCGC-II,MNBH,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,276
9,SCGC-II,SCGM,,,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.245,0.367,0.0,0.612,0.612,100.0,0.0,0.245,0.367,0.0,0.612,0.612,6538


# Airport Line

In [328]:
filenames = []

root_dir = "/Users/jackbedford/Desktop/MOXA/Code/data"
date_dirs = ['2023-10-26']
date_dirs = [os.path.join(root_dir, s) for s in sorted(date_dirs)]

for date_dir in date_dirs:
    exp_dirs = [s for s in os.listdir(date_dir) if os.path.isdir(os.path.join(date_dir, s)) and s != '.DS_Store']
    exp_dirs = [os.path.join(date_dir, s) for s in sorted(exp_dirs)]
    for exp_dir in exp_dirs:
        dev_dirs = [s for s in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, s)) and s != '.DS_Store']
        dev_dirs = [os.path.join(exp_dir, s) for s in sorted(dev_dirs)]
        dev_combo_dirs = list(it.combinations(dev_dirs, 2))
        for dev_dir1, dev_dir2 in dev_combo_dirs:
            trip_dirs1 = [s for s in os.listdir(dev_dir1) if os.path.isdir(os.path.join(dev_dir1, s)) and s != '.DS_Store']
            trip_dirs1 = [os.path.join(dev_dir1, s, 'data') for s in sorted(trip_dirs1)]
            trip_dirs2 = [s for s in os.listdir(dev_dir2) if os.path.isdir(os.path.join(dev_dir2, s)) and s != '.DS_Store']
            trip_dirs2 = [os.path.join(dev_dir2, s, 'data') for s in sorted(trip_dirs2)]
            
            for trip_dir1, trip_dir2 in zip(trip_dirs1, trip_dirs2):
                # print((trip_dir1, trip_dir2))
                rrc_file1 = [os.path.join(trip_dir1, s) for s in os.listdir(trip_dir1) if s.endswith('rrc.csv')][0]
                dl_file1 = os.path.join(trip_dir1, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file1 = os.path.join(trip_dir1, 'udp_uplk_loss_latency_ho.pkl')
                rrc_file2 = [os.path.join(trip_dir2, s) for s in os.listdir(trip_dir2) if s.endswith('rrc.csv')][0]
                dl_file2 = os.path.join(trip_dir2, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file2 = os.path.join(trip_dir2, 'udp_uplk_loss_latency_ho.pkl')
                
                # print(rrc_file1, os.path.isfile(rrc_file1))
                # print(dl_file1, os.path.isfile(dl_file1))
                # print(ul_file1, os.path.isfile(ul_file1))
                # print(rrc_file2, os.path.isfile(rrc_file2))
                # print(dl_file2, os.path.isfile(dl_file2))
                # print(ul_file2, os.path.isfile(ul_file2))
                
                if os.path.isfile(rrc_file1) and os.path.isfile(dl_file1) and os.path.isfile(ul_file1) and \
                    os.path.isfile(rrc_file2) and os.path.isfile(dl_file2) and os.path.isfile(ul_file2):
                        filenames.append((dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2))
                
pprint(filenames, sort_dicts=False)

[('/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm00/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm01/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm00/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm01/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm00/#01/data/diag_log_sm00_2023-10-26_13-30-26_rrc.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm01/#01/data/diag_log_sm01_2023-10-26_13-30-25_rrc.csv'),
 ('/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm00/#02/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-26/Bandlock_UDP_9_Phone/sm01/#02/data/udp_dnlk_loss_latency_ho.pkl

In [329]:
selected_cols = ['seq', 'Timestamp', 'lost', 'excl', 'latency', 'xmit_time', 'arr_time',
                 'stage', 'category', 'type', 'cause', 'index', 'inter-RAT', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-gNB']

ho_types = [
    'NASR', 'MCGF', 'SCGF',
    'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
    'LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 
]

# list(it.product(ho_types, repeat=2))
# list(it.combinations_with_replacement(ho_types, 2))

stats_table_list = []
for dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2 in filenames:
    ########### in for loop (iterating exprs) ##### start
    df = pd.read_pickle(dl_file1)[selected_cols]
    df1 = pd.read_pickle(dl_file2)[selected_cols]
    # df_ho, _ = mi_parse_handover(pd.read_csv(rrc_file1))
    # df_ho1, _ = mi_parse_handover(pd.read_csv(rrc_file2))

    df = df.merge(df1, on='seq')
    del df1

    df['type_x'] = df['type_x'].astype('category')
    df['type_x'] = df['type_x'].cat.set_categories(ho_types)
    df['type_y'] = df['type_y'].astype('category')
    df['type_y'] = df['type_y'].cat.set_categories(ho_types)

    df['lost_x&y'] = df['lost_x'] & df['lost_y']
    df['lost_x^y'] = df['lost_x'] ^ df['lost_y']
    df['lost_x|y'] = df['lost_x'] | df['lost_y']

    df['loex_x'] = df['excl_x'].copy()
    df['loex_y'] = df['excl_y'].copy()
    df['excl_x'] = ~df['lost_x'] & df['loex_x']
    df['excl_y'] = ~df['lost_y'] & df['loex_y']

    df['loex_x&y'] = df['loex_x'] & df['loex_y']
    df['loex_x^y'] = df['loex_x'] ^ df['loex_y']
    df['loex_x|y'] = df['loex_x'] | df['loex_y']

    df['excl_x&y'] = df['lost_x&y'] ^ df['loex_x&y']
    df['excl_x^y'] = ~(df['lost_x&y'] | df['lost_x^y']) & df['loex_x^y']
    df['excl_x|y'] = df['excl_x&y'] ^ df['excl_x^y']

    df['total'] = True

    table = df.groupby(['type_x', 'type_y']) \
        [['lost_x', 'lost_y', 'lost_x&y', 'lost_x^y', 'lost_x|y', 
        'excl_x', 'excl_y', 'excl_x&y', 'excl_x^y', 'excl_x|y', 
        'loex_x', 'loex_y', 'loex_x&y', 'loex_x^y', 'loex_x|y', 'total']].sum()
    
    # # complete table
    # display(table.reset_index())
    # # complete dictionary
    # pprint(table.to_dict(orient='index'), sort_dicts=False)
    # # recover the complete table from dictionary
    # display(pd.DataFrame(table.to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())
        
    # # table without all-zero rows
    # display(table[(table != 0).any(axis=1)].reset_index())
    # # dictionary without all-zero rows
    # pprint(table[(table != 0).any(axis=1)].to_dict(orient='index'), sort_dicts=False)
    # # recover the table without all-zero rows from dictionary
    # display(pd.DataFrame(table[(table != 0).any(axis=1)].to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())

    stats_table_list.append(table)
    del table
    ########### in for loop (iterating exprs) ##### end

# 执行逐元素相加操作并得到总和
# cnt_table = stats_table_list[0]
# for df in stats_table_list[1:]:
#     cnt_table = cnt_table.add(df)
cnt_table = reduce(lambda x, y: x.add(y, fill_value=0), stats_table_list)
cnt_dict = cnt_table.to_dict(orient='index')

ho_combos = list(it.combinations_with_replacement(ho_types, 2))
for combo in ho_combos:
    if combo[0] == combo[1]:
        continue
    combo1 = (combo[1], combo[0])
    # print(combo, combo1)
    
    cnt_dict[combo]['lost_x'] += cnt_dict[combo1]['lost_y']
    cnt_dict[combo]['lost_y'] += cnt_dict[combo1]['lost_x']
    cnt_dict[combo]['excl_x'] += cnt_dict[combo1]['excl_y']
    cnt_dict[combo]['excl_y'] += cnt_dict[combo1]['excl_x']
    cnt_dict[combo]['loex_x'] += cnt_dict[combo1]['loex_y']
    cnt_dict[combo]['loex_y'] += cnt_dict[combo1]['loex_x']
    
    cnt_dict[combo]['lost_x&y'] += cnt_dict[combo1]['lost_x&y']
    cnt_dict[combo]['lost_x^y'] += cnt_dict[combo1]['lost_x^y']
    cnt_dict[combo]['lost_x|y'] += cnt_dict[combo1]['lost_x|y']
    cnt_dict[combo]['excl_x&y'] += cnt_dict[combo1]['excl_x&y']
    cnt_dict[combo]['excl_x^y'] += cnt_dict[combo1]['excl_x^y']
    cnt_dict[combo]['excl_x|y'] += cnt_dict[combo1]['excl_x|y']
    cnt_dict[combo]['loex_x&y'] += cnt_dict[combo1]['loex_x&y']
    cnt_dict[combo]['loex_x^y'] += cnt_dict[combo1]['loex_x^y']
    cnt_dict[combo]['loex_x|y'] += cnt_dict[combo1]['loex_x|y']
    
    cnt_dict[combo]['total'] += cnt_dict[combo1]['total']
    
    del cnt_dict[combo1]

cnt_table = pd.DataFrame(cnt_dict).T.rename_axis(['type_x', 'type_y'])

# 打印结果
# display(cnt_table[(cnt_table != 0).any(axis=1)])

# 創建新表格
rate_table = pd.DataFrame()
# 选择所有分子列
molecule_cols = cnt_table.columns[:-1]
# 计算百分比
df_percentage = (cnt_table[molecule_cols].div(cnt_table['total'], axis=0) * 100).round(3)
df_percentage.columns = [f'{col}_r' for col in molecule_cols]
# save / blown save
rate_table['lost_xy_svr'] = (cnt_table['lost_x^y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # save
rate_table['lost_xy_bsr'] = (cnt_table['lost_x&y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # blown save
rate_table['excl_xy_svr'] = (cnt_table['excl_x^y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # save
rate_table['excl_xy_bsr'] = (cnt_table['excl_x&y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # blown save
rate_table['loex_xy_svr'] = (cnt_table['loex_x^y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # save
rate_table['loex_xy_bsr'] = (cnt_table['loex_x&y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # blown save
# 合併表格
rate_table = pd.concat([rate_table, cnt_table['total'], df_percentage], axis=1)
rate_table = rate_table[['lost_xy_svr', 'lost_xy_bsr', 'lost_x_r', 'lost_y_r', 'lost_x&y_r', 'lost_x^y_r', 'lost_x|y_r',
                         'excl_xy_svr', 'excl_xy_bsr', 'excl_x_r', 'excl_y_r', 'excl_x&y_r', 'excl_x^y_r', 'excl_x|y_r',
                         'loex_xy_svr', 'loex_xy_bsr', 'loex_x_r', 'loex_y_r', 'loex_x&y_r', 'loex_x^y_r', 'loex_x|y_r', 'total']]

In [330]:
# 打印结果
print('Airport Line:', len(stats_table_list), 'combos')
display(cnt_table[cnt_table['total'] != 0].reset_index())
display(rate_table[rate_table['total'] != 0].reset_index())

save_dir = "/Users/jackbedford/Desktop/MOXA/Code/results/2023-11-08 results"
cnt_table.reset_index().to_csv(os.path.join(save_dir, "Downlink_Airport_Count_Stats.csv"))
pd.concat([rate_table, cnt_table[cnt_table.columns[:-1]]], axis=1).reset_index().to_csv(os.path.join(save_dir, "Downlink_Airport_Rate_Stats.csv"))

Airport Line: 64 combos


Unnamed: 0,type_x,type_y,lost_x,lost_y,lost_x&y,lost_x^y,lost_x|y,excl_x,excl_y,excl_x&y,excl_x^y,excl_x|y,loex_x,loex_y,loex_x&y,loex_x^y,loex_x|y,total
0,NASR,NASR,399,1078,263,951,1214,1117,98,711,490,1201,1516,1176,974,744,1718,2464
1,NASR,MCGF,28183,13931,8553,25008,33561,9578,8145,9601,6151,15752,37761,22076,18154,23529,41683,64784
2,NASR,SCGF,1097,417,362,790,1152,69,148,154,53,207,1166,565,516,699,1215,2613
3,NASR,SCGA,160,115,114,47,161,0,99,46,53,99,160,214,160,54,214,214
4,NASR,SCGR-I,4838,94,0,4932,4932,1639,158,171,1626,1797,6477,252,171,6387,6558,12033
5,NASR,SCGC-I,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,145
6,NASR,SCGC-II,472,0,0,472,472,0,0,0,0,0,472,0,0,472,472,482
7,NASR,ENBH,1097,296,199,995,1194,69,276,207,109,316,1166,572,406,926,1332,2773
8,NASR,MCGH,597,487,487,110,597,0,110,110,0,110,597,597,597,0,597,768
9,NASR,MNBH,946,485,321,789,1110,113,242,189,157,346,1059,727,510,766,1276,2825


Unnamed: 0,type_x,type_y,lost_xy_svr,lost_xy_bsr,lost_x_r,lost_y_r,lost_x&y_r,lost_x^y_r,lost_x|y_r,excl_xy_svr,excl_xy_bsr,excl_x_r,excl_y_r,excl_x&y_r,excl_x^y_r,excl_x|y_r,loex_xy_svr,loex_xy_bsr,loex_x_r,loex_y_r,loex_x&y_r,loex_x^y_r,loex_x|y_r,total
0,NASR,NASR,78.336,21.664,16.193,43.75,10.674,38.596,49.269,40.799,59.201,45.333,3.977,28.856,19.886,48.742,43.306,56.694,61.526,47.727,39.529,30.195,69.724,2464
1,NASR,MCGF,74.515,25.485,43.503,21.504,13.202,38.602,51.804,39.049,60.951,14.785,12.573,14.82,9.495,24.315,56.447,43.553,58.288,34.076,28.022,36.319,64.342,64784
2,NASR,SCGF,68.576,31.424,41.982,15.959,13.854,30.233,44.087,25.604,74.396,2.641,5.664,5.894,2.028,7.922,57.531,42.469,44.623,21.623,19.747,26.751,46.498,2613
3,NASR,SCGA,29.193,70.807,74.766,53.738,53.271,21.963,75.234,53.535,46.465,0.0,46.262,21.495,24.766,46.262,25.234,74.766,74.766,100.0,74.766,25.234,100.0,214
4,NASR,SCGR-I,100.0,0.0,40.206,0.781,0.0,40.987,40.987,90.484,9.516,13.621,1.313,1.421,13.513,14.934,97.392,2.608,53.827,2.094,1.421,53.079,54.5,12033
5,NASR,SCGC-I,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,145
6,NASR,SCGC-II,100.0,0.0,97.925,0.0,0.0,97.925,97.925,,,0.0,0.0,0.0,0.0,0.0,100.0,0.0,97.925,0.0,0.0,97.925,97.925,482
7,NASR,ENBH,83.333,16.667,39.56,10.674,7.176,35.882,43.058,34.494,65.506,2.488,9.953,7.465,3.931,11.396,69.52,30.48,42.048,20.627,14.641,33.393,48.035,2773
8,NASR,MCGH,18.425,81.575,77.734,63.411,63.411,14.323,77.734,0.0,100.0,0.0,14.323,14.323,0.0,14.323,0.0,100.0,77.734,77.734,77.734,0.0,77.734,768
9,NASR,MNBH,71.081,28.919,33.487,17.168,11.363,27.929,39.292,45.376,54.624,4.0,8.566,6.69,5.558,12.248,60.031,39.969,37.487,25.735,18.053,27.115,45.168,2825


# Brown Line

In [331]:
filenames = []

root_dir = "/Users/jackbedford/Desktop/MOXA/Code/data"
date_dirs = ['2023-09-12', '2023-09-21']
date_dirs = [os.path.join(root_dir, s) for s in sorted(date_dirs)]

for date_dir in date_dirs:
    exp_dirs = [s for s in os.listdir(date_dir) if os.path.isdir(os.path.join(date_dir, s)) and s != '.DS_Store']
    exp_dirs = [os.path.join(date_dir, s) for s in sorted(exp_dirs)]
    for exp_dir in exp_dirs:
        dev_dirs = [s for s in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, s)) and s != '.DS_Store']
        dev_dirs = [os.path.join(exp_dir, s) for s in sorted(dev_dirs)]
        dev_combo_dirs = list(it.combinations(dev_dirs, 2))
        for dev_dir1, dev_dir2 in dev_combo_dirs:
            trip_dirs1 = [s for s in os.listdir(dev_dir1) if os.path.isdir(os.path.join(dev_dir1, s)) and s != '.DS_Store']
            trip_dirs1 = [os.path.join(dev_dir1, s, 'data') for s in sorted(trip_dirs1)]
            trip_dirs2 = [s for s in os.listdir(dev_dir2) if os.path.isdir(os.path.join(dev_dir2, s)) and s != '.DS_Store']
            trip_dirs2 = [os.path.join(dev_dir2, s, 'data') for s in sorted(trip_dirs2)]
            
            for trip_dir1, trip_dir2 in zip(trip_dirs1, trip_dirs2):
                # print((trip_dir1, trip_dir2))
                rrc_file1 = [os.path.join(trip_dir1, s) for s in os.listdir(trip_dir1) if s.endswith('rrc.csv')][0]
                dl_file1 = os.path.join(trip_dir1, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file1 = os.path.join(trip_dir1, 'udp_uplk_loss_latency_ho.pkl')
                rrc_file2 = [os.path.join(trip_dir2, s) for s in os.listdir(trip_dir2) if s.endswith('rrc.csv')][0]
                dl_file2 = os.path.join(trip_dir2, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file2 = os.path.join(trip_dir2, 'udp_uplk_loss_latency_ho.pkl')
                
                # print(rrc_file1, os.path.isfile(rrc_file1))
                # print(dl_file1, os.path.isfile(dl_file1))
                # print(ul_file1, os.path.isfile(ul_file1))
                # print(rrc_file2, os.path.isfile(rrc_file2))
                # print(dl_file2, os.path.isfile(dl_file2))
                # print(ul_file2, os.path.isfile(ul_file2))
                
                if os.path.isfile(rrc_file1) and os.path.isfile(dl_file1) and os.path.isfile(ul_file1) and \
                    os.path.isfile(rrc_file2) and os.path.isfile(dl_file2) and os.path.isfile(ul_file2):
                        filenames.append((dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2))
                
pprint(filenames, sort_dicts=False)

[('/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm00/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm01/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm00/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm01/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm00/#01/data/diag_log_sm00_2023-09-12_13-34-15_rrc.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm01/#01/data/diag_log_sm01_2023-09-12_13-34-15_rrc.csv'),
 ('/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm00/#02/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12/Bandlock_UDP_9_Phone/sm01/#02/data/udp_dnlk_loss_latency_ho.pkl

In [332]:
selected_cols = ['seq', 'Timestamp', 'lost', 'excl', 'latency', 'xmit_time', 'arr_time',
                 'stage', 'category', 'type', 'cause', 'index', 'inter-RAT', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-gNB']

ho_types = [
    'NASR', 'MCGF', 'SCGF',
    'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
    'LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 
]

# list(it.product(ho_types, repeat=2))
# list(it.combinations_with_replacement(ho_types, 2))

stats_table_list = []
for dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2 in filenames:
    ########### in for loop (iterating exprs) ##### start
    df = pd.read_pickle(dl_file1)[selected_cols]
    df1 = pd.read_pickle(dl_file2)[selected_cols]
    # df_ho, _ = mi_parse_handover(pd.read_csv(rrc_file1))
    # df_ho1, _ = mi_parse_handover(pd.read_csv(rrc_file2))

    df = df.merge(df1, on='seq')
    del df1

    df['type_x'] = df['type_x'].astype('category')
    df['type_x'] = df['type_x'].cat.set_categories(ho_types)
    df['type_y'] = df['type_y'].astype('category')
    df['type_y'] = df['type_y'].cat.set_categories(ho_types)

    df['lost_x&y'] = df['lost_x'] & df['lost_y']
    df['lost_x^y'] = df['lost_x'] ^ df['lost_y']
    df['lost_x|y'] = df['lost_x'] | df['lost_y']

    df['loex_x'] = df['excl_x'].copy()
    df['loex_y'] = df['excl_y'].copy()
    df['excl_x'] = ~df['lost_x'] & df['loex_x']
    df['excl_y'] = ~df['lost_y'] & df['loex_y']

    df['loex_x&y'] = df['loex_x'] & df['loex_y']
    df['loex_x^y'] = df['loex_x'] ^ df['loex_y']
    df['loex_x|y'] = df['loex_x'] | df['loex_y']

    df['excl_x&y'] = df['lost_x&y'] ^ df['loex_x&y']
    df['excl_x^y'] = ~(df['lost_x&y'] | df['lost_x^y']) & df['loex_x^y']
    df['excl_x|y'] = df['excl_x&y'] ^ df['excl_x^y']

    df['total'] = True

    table = df.groupby(['type_x', 'type_y']) \
        [['lost_x', 'lost_y', 'lost_x&y', 'lost_x^y', 'lost_x|y', 
        'excl_x', 'excl_y', 'excl_x&y', 'excl_x^y', 'excl_x|y', 
        'loex_x', 'loex_y', 'loex_x&y', 'loex_x^y', 'loex_x|y', 'total']].sum()
    
    # # complete table
    # display(table.reset_index())
    # # complete dictionary
    # pprint(table.to_dict(orient='index'), sort_dicts=False)
    # # recover the complete table from dictionary
    # display(pd.DataFrame(table.to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())
        
    # # table without all-zero rows
    # display(table[(table != 0).any(axis=1)].reset_index())
    # # dictionary without all-zero rows
    # pprint(table[(table != 0).any(axis=1)].to_dict(orient='index'), sort_dicts=False)
    # # recover the table without all-zero rows from dictionary
    # display(pd.DataFrame(table[(table != 0).any(axis=1)].to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())

    stats_table_list.append(table)
    del table
    ########### in for loop (iterating exprs) ##### end

# 执行逐元素相加操作并得到总和
# cnt_table = stats_table_list[0]
# for df in stats_table_list[1:]:
#     cnt_table = cnt_table.add(df)
cnt_table = reduce(lambda x, y: x.add(y, fill_value=0), stats_table_list)
cnt_dict = cnt_table.to_dict(orient='index')

ho_combos = list(it.combinations_with_replacement(ho_types, 2))
for combo in ho_combos:
    if combo[0] == combo[1]:
        continue
    combo1 = (combo[1], combo[0])
    # print(combo, combo1)
    
    cnt_dict[combo]['lost_x'] += cnt_dict[combo1]['lost_y']
    cnt_dict[combo]['lost_y'] += cnt_dict[combo1]['lost_x']
    cnt_dict[combo]['excl_x'] += cnt_dict[combo1]['excl_y']
    cnt_dict[combo]['excl_y'] += cnt_dict[combo1]['excl_x']
    cnt_dict[combo]['loex_x'] += cnt_dict[combo1]['loex_y']
    cnt_dict[combo]['loex_y'] += cnt_dict[combo1]['loex_x']
    
    cnt_dict[combo]['lost_x&y'] += cnt_dict[combo1]['lost_x&y']
    cnt_dict[combo]['lost_x^y'] += cnt_dict[combo1]['lost_x^y']
    cnt_dict[combo]['lost_x|y'] += cnt_dict[combo1]['lost_x|y']
    cnt_dict[combo]['excl_x&y'] += cnt_dict[combo1]['excl_x&y']
    cnt_dict[combo]['excl_x^y'] += cnt_dict[combo1]['excl_x^y']
    cnt_dict[combo]['excl_x|y'] += cnt_dict[combo1]['excl_x|y']
    cnt_dict[combo]['loex_x&y'] += cnt_dict[combo1]['loex_x&y']
    cnt_dict[combo]['loex_x^y'] += cnt_dict[combo1]['loex_x^y']
    cnt_dict[combo]['loex_x|y'] += cnt_dict[combo1]['loex_x|y']
    
    cnt_dict[combo]['total'] += cnt_dict[combo1]['total']
    
    del cnt_dict[combo1]

cnt_table = pd.DataFrame(cnt_dict).T.rename_axis(['type_x', 'type_y'])

# 打印结果
# display(cnt_table[(cnt_table != 0).any(axis=1)])

# 創建新表格
rate_table = pd.DataFrame()
# 选择所有分子列
molecule_cols = cnt_table.columns[:-1]
# 计算百分比
df_percentage = (cnt_table[molecule_cols].div(cnt_table['total'], axis=0) * 100).round(3)
df_percentage.columns = [f'{col}_r' for col in molecule_cols]
# save / blown save
rate_table['lost_xy_svr'] = (cnt_table['lost_x^y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # save
rate_table['lost_xy_bsr'] = (cnt_table['lost_x&y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # blown save
rate_table['excl_xy_svr'] = (cnt_table['excl_x^y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # save
rate_table['excl_xy_bsr'] = (cnt_table['excl_x&y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # blown save
rate_table['loex_xy_svr'] = (cnt_table['loex_x^y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # save
rate_table['loex_xy_bsr'] = (cnt_table['loex_x&y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # blown save
# 合併表格
rate_table = pd.concat([rate_table, cnt_table['total'], df_percentage], axis=1)
rate_table = rate_table[['lost_xy_svr', 'lost_xy_bsr', 'lost_x_r', 'lost_y_r', 'lost_x&y_r', 'lost_x^y_r', 'lost_x|y_r',
                         'excl_xy_svr', 'excl_xy_bsr', 'excl_x_r', 'excl_y_r', 'excl_x&y_r', 'excl_x^y_r', 'excl_x|y_r',
                         'loex_xy_svr', 'loex_xy_bsr', 'loex_x_r', 'loex_y_r', 'loex_x&y_r', 'loex_x^y_r', 'loex_x|y_r', 'total']]

In [333]:
# 打印结果
print('Brown Line:', len(stats_table_list), 'combos')
display(cnt_table[cnt_table['total'] != 0].reset_index())
display(rate_table[rate_table['total'] != 0].reset_index())

save_dir = "/Users/jackbedford/Desktop/MOXA/Code/results/2023-11-08 results"
cnt_table.reset_index().to_csv(os.path.join(save_dir, "Downlink_Brown_Count_Stats.csv"))
pd.concat([rate_table, cnt_table[cnt_table.columns[:-1]]], axis=1).reset_index().to_csv(os.path.join(save_dir, "Downlink_Brown_Rate_Stats.csv"))

Brown Line: 144 combos


Unnamed: 0,type_x,type_y,lost_x,lost_y,lost_x&y,lost_x^y,lost_x|y,excl_x,excl_y,excl_x&y,excl_x^y,excl_x|y,loex_x,loex_y,loex_x&y,loex_x^y,loex_x|y,total
0,NASR,MCGF,7691,1694,1694,5997,7691,0,0,0,0,0,7691,1694,1694,5997,7691,16168
1,NASR,LTEH,347,0,0,347,347,0,182,171,11,182,347,182,171,187,358,1613
2,NASR,ENBH,9640,5,5,9635,9640,1812,488,467,1796,2263,11452,493,472,11001,11473,14527
3,NASR,MNBH,952,0,0,952,952,906,0,0,906,906,1858,0,0,1858,1858,3783
4,MCGF,MCGF,148760,146961,68867,157987,226854,81308,72854,55272,80870,136142,230068,219815,124139,201605,325744,877207
5,MCGF,SCGF,16815,668,190,17103,17293,2502,8939,1576,9687,11263,19317,9607,1766,25392,27158,103189
6,MCGF,SCGA,6130,97,89,6049,6138,838,2187,1305,1398,2703,6968,2284,1394,6464,7858,25955
7,MCGF,SCGC-I,419,292,127,457,584,118,176,233,13,246,537,468,360,285,645,784
8,MCGF,SCGR-II,864,7,0,871,871,120,170,133,67,200,984,177,133,895,1028,10239
9,MCGF,SCGC-II,9068,1823,373,10145,10518,2890,3092,1327,4377,5704,11958,4915,1700,13473,15173,52281


Unnamed: 0,type_x,type_y,lost_xy_svr,lost_xy_bsr,lost_x_r,lost_y_r,lost_x&y_r,lost_x^y_r,lost_x|y_r,excl_xy_svr,excl_xy_bsr,excl_x_r,excl_y_r,excl_x&y_r,excl_x^y_r,excl_x|y_r,loex_xy_svr,loex_xy_bsr,loex_x_r,loex_y_r,loex_x&y_r,loex_x^y_r,loex_x|y_r,total
0,NASR,MCGF,77.974,22.026,47.569,10.477,10.477,37.092,47.569,,,0.0,0.0,0.0,0.0,0.0,77.974,22.026,47.569,10.477,10.477,37.092,47.569,16168
1,NASR,LTEH,100.0,0.0,21.513,0.0,0.0,21.513,21.513,6.044,93.956,0.0,11.283,10.601,0.682,11.283,52.235,47.765,21.513,11.283,10.601,11.593,22.195,1613
2,NASR,ENBH,99.948,0.052,66.359,0.034,0.034,66.325,66.359,79.364,20.636,12.473,3.359,3.215,12.363,15.578,95.886,4.114,78.833,3.394,3.249,75.728,78.977,14527
3,NASR,MNBH,100.0,0.0,25.165,0.0,0.0,25.165,25.165,100.0,0.0,23.949,0.0,0.0,23.949,23.949,100.0,0.0,49.114,0.0,0.0,49.114,49.114,3783
4,MCGF,MCGF,69.643,30.357,16.958,16.753,7.851,18.01,25.861,59.401,40.599,9.269,8.305,6.301,9.219,15.52,61.891,38.109,26.227,25.059,14.152,22.983,37.134,877207
5,MCGF,SCGF,98.901,1.099,16.295,0.647,0.184,16.574,16.759,86.007,13.993,2.425,8.663,1.527,9.388,10.915,93.497,6.503,18.72,9.31,1.711,24.607,26.319,103189
6,MCGF,SCGA,98.55,1.45,23.618,0.374,0.343,23.306,23.649,51.72,48.28,3.229,8.426,5.028,5.386,10.414,82.26,17.74,26.846,8.8,5.371,24.905,30.275,25955
7,MCGF,SCGC-I,78.253,21.747,53.444,37.245,16.199,58.291,74.49,5.285,94.715,15.051,22.449,29.719,1.658,31.378,44.186,55.814,68.495,59.694,45.918,36.352,82.27,784
8,MCGF,SCGR-II,100.0,0.0,8.438,0.068,0.0,8.507,8.507,33.5,66.5,1.172,1.66,1.299,0.654,1.953,87.062,12.938,9.61,1.729,1.299,8.741,10.04,10239
9,MCGF,SCGC-II,96.454,3.546,17.345,3.487,0.713,19.405,20.118,76.736,23.264,5.528,5.914,2.538,8.372,10.91,88.796,11.204,22.873,9.401,3.252,25.77,29.022,52281


# Green Line

In [334]:
filenames = []

root_dir = "/Users/jackbedford/Desktop/MOXA/Code/data"
date_dirs = ['2023-10-05']
date_dirs = [os.path.join(root_dir, s) for s in sorted(date_dirs)]

for date_dir in date_dirs:
    exp_dirs = [s for s in os.listdir(date_dir) if os.path.isdir(os.path.join(date_dir, s)) and s != '.DS_Store']
    exp_dirs = [os.path.join(date_dir, s) for s in sorted(exp_dirs)]
    for exp_dir in exp_dirs:
        dev_dirs = [s for s in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, s)) and s != '.DS_Store']
        dev_dirs = [os.path.join(exp_dir, s) for s in sorted(dev_dirs)]
        dev_combo_dirs = list(it.combinations(dev_dirs, 2))
        for dev_dir1, dev_dir2 in dev_combo_dirs:
            trip_dirs1 = [s for s in os.listdir(dev_dir1) if os.path.isdir(os.path.join(dev_dir1, s)) and s != '.DS_Store']
            trip_dirs1 = [os.path.join(dev_dir1, s, 'data') for s in sorted(trip_dirs1)]
            trip_dirs2 = [s for s in os.listdir(dev_dir2) if os.path.isdir(os.path.join(dev_dir2, s)) and s != '.DS_Store']
            trip_dirs2 = [os.path.join(dev_dir2, s, 'data') for s in sorted(trip_dirs2)]
            
            for trip_dir1, trip_dir2 in zip(trip_dirs1, trip_dirs2):
                # print((trip_dir1, trip_dir2))
                rrc_file1 = [os.path.join(trip_dir1, s) for s in os.listdir(trip_dir1) if s.endswith('rrc.csv')][0]
                dl_file1 = os.path.join(trip_dir1, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file1 = os.path.join(trip_dir1, 'udp_uplk_loss_latency_ho.pkl')
                rrc_file2 = [os.path.join(trip_dir2, s) for s in os.listdir(trip_dir2) if s.endswith('rrc.csv')][0]
                dl_file2 = os.path.join(trip_dir2, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file2 = os.path.join(trip_dir2, 'udp_uplk_loss_latency_ho.pkl')
                
                # print(rrc_file1, os.path.isfile(rrc_file1))
                # print(dl_file1, os.path.isfile(dl_file1))
                # print(ul_file1, os.path.isfile(ul_file1))
                # print(rrc_file2, os.path.isfile(rrc_file2))
                # print(dl_file2, os.path.isfile(dl_file2))
                # print(ul_file2, os.path.isfile(ul_file2))
                
                if os.path.isfile(rrc_file1) and os.path.isfile(dl_file1) and os.path.isfile(ul_file1) and \
                    os.path.isfile(rrc_file2) and os.path.isfile(dl_file2) and os.path.isfile(ul_file2):
                        filenames.append((dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2))
                
pprint(filenames, sort_dicts=False)

[('/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm00/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm01/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm00/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm01/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm00/#01/data/diag_log_sm00_2023-10-05_14-24-16_rrc.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm01/#01/data/diag_log_sm01_2023-10-05_14-24-16_rrc.csv'),
 ('/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm00/#02/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-05/Bandlock_UDP_9_Phone/sm01/#02/data/udp_dnlk_loss_latency_ho.pkl

In [335]:
selected_cols = ['seq', 'Timestamp', 'lost', 'excl', 'latency', 'xmit_time', 'arr_time',
                 'stage', 'category', 'type', 'cause', 'index', 'inter-RAT', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-gNB']

ho_types = [
    'NASR', 'MCGF', 'SCGF',
    'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
    'LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 
]

# list(it.product(ho_types, repeat=2))
# list(it.combinations_with_replacement(ho_types, 2))

stats_table_list = []
for dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2 in filenames:
    ########### in for loop (iterating exprs) ##### start
    df = pd.read_pickle(dl_file1)[selected_cols]
    df1 = pd.read_pickle(dl_file2)[selected_cols]
    # df_ho, _ = mi_parse_handover(pd.read_csv(rrc_file1))
    # df_ho1, _ = mi_parse_handover(pd.read_csv(rrc_file2))

    df = df.merge(df1, on='seq')
    del df1

    df['type_x'] = df['type_x'].astype('category')
    df['type_x'] = df['type_x'].cat.set_categories(ho_types)
    df['type_y'] = df['type_y'].astype('category')
    df['type_y'] = df['type_y'].cat.set_categories(ho_types)

    df['lost_x&y'] = df['lost_x'] & df['lost_y']
    df['lost_x^y'] = df['lost_x'] ^ df['lost_y']
    df['lost_x|y'] = df['lost_x'] | df['lost_y']

    df['loex_x'] = df['excl_x'].copy()
    df['loex_y'] = df['excl_y'].copy()
    df['excl_x'] = ~df['lost_x'] & df['loex_x']
    df['excl_y'] = ~df['lost_y'] & df['loex_y']

    df['loex_x&y'] = df['loex_x'] & df['loex_y']
    df['loex_x^y'] = df['loex_x'] ^ df['loex_y']
    df['loex_x|y'] = df['loex_x'] | df['loex_y']

    df['excl_x&y'] = df['lost_x&y'] ^ df['loex_x&y']
    df['excl_x^y'] = ~(df['lost_x&y'] | df['lost_x^y']) & df['loex_x^y']
    df['excl_x|y'] = df['excl_x&y'] ^ df['excl_x^y']

    df['total'] = True

    table = df.groupby(['type_x', 'type_y']) \
        [['lost_x', 'lost_y', 'lost_x&y', 'lost_x^y', 'lost_x|y', 
        'excl_x', 'excl_y', 'excl_x&y', 'excl_x^y', 'excl_x|y', 
        'loex_x', 'loex_y', 'loex_x&y', 'loex_x^y', 'loex_x|y', 'total']].sum()
    
    # # complete table
    # display(table.reset_index())
    # # complete dictionary
    # pprint(table.to_dict(orient='index'), sort_dicts=False)
    # # recover the complete table from dictionary
    # display(pd.DataFrame(table.to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())
        
    # # table without all-zero rows
    # display(table[(table != 0).any(axis=1)].reset_index())
    # # dictionary without all-zero rows
    # pprint(table[(table != 0).any(axis=1)].to_dict(orient='index'), sort_dicts=False)
    # # recover the table without all-zero rows from dictionary
    # display(pd.DataFrame(table[(table != 0).any(axis=1)].to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())

    stats_table_list.append(table)
    del table
    ########### in for loop (iterating exprs) ##### end

# 执行逐元素相加操作并得到总和
# cnt_table = stats_table_list[0]
# for df in stats_table_list[1:]:
#     cnt_table = cnt_table.add(df)
cnt_table = reduce(lambda x, y: x.add(y, fill_value=0), stats_table_list)
cnt_dict = cnt_table.to_dict(orient='index')

ho_combos = list(it.combinations_with_replacement(ho_types, 2))
for combo in ho_combos:
    if combo[0] == combo[1]:
        continue
    combo1 = (combo[1], combo[0])
    # print(combo, combo1)
    
    cnt_dict[combo]['lost_x'] += cnt_dict[combo1]['lost_y']
    cnt_dict[combo]['lost_y'] += cnt_dict[combo1]['lost_x']
    cnt_dict[combo]['excl_x'] += cnt_dict[combo1]['excl_y']
    cnt_dict[combo]['excl_y'] += cnt_dict[combo1]['excl_x']
    cnt_dict[combo]['loex_x'] += cnt_dict[combo1]['loex_y']
    cnt_dict[combo]['loex_y'] += cnt_dict[combo1]['loex_x']
    
    cnt_dict[combo]['lost_x&y'] += cnt_dict[combo1]['lost_x&y']
    cnt_dict[combo]['lost_x^y'] += cnt_dict[combo1]['lost_x^y']
    cnt_dict[combo]['lost_x|y'] += cnt_dict[combo1]['lost_x|y']
    cnt_dict[combo]['excl_x&y'] += cnt_dict[combo1]['excl_x&y']
    cnt_dict[combo]['excl_x^y'] += cnt_dict[combo1]['excl_x^y']
    cnt_dict[combo]['excl_x|y'] += cnt_dict[combo1]['excl_x|y']
    cnt_dict[combo]['loex_x&y'] += cnt_dict[combo1]['loex_x&y']
    cnt_dict[combo]['loex_x^y'] += cnt_dict[combo1]['loex_x^y']
    cnt_dict[combo]['loex_x|y'] += cnt_dict[combo1]['loex_x|y']
    
    cnt_dict[combo]['total'] += cnt_dict[combo1]['total']
    
    del cnt_dict[combo1]

cnt_table = pd.DataFrame(cnt_dict).T.rename_axis(['type_x', 'type_y'])

# 打印结果
# display(cnt_table[(cnt_table != 0).any(axis=1)])

# 創建新表格
rate_table = pd.DataFrame()
# 选择所有分子列
molecule_cols = cnt_table.columns[:-1]
# 计算百分比
df_percentage = (cnt_table[molecule_cols].div(cnt_table['total'], axis=0) * 100).round(3)
df_percentage.columns = [f'{col}_r' for col in molecule_cols]
# save / blown save
rate_table['lost_xy_svr'] = (cnt_table['lost_x^y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # save
rate_table['lost_xy_bsr'] = (cnt_table['lost_x&y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # blown save
rate_table['excl_xy_svr'] = (cnt_table['excl_x^y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # save
rate_table['excl_xy_bsr'] = (cnt_table['excl_x&y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # blown save
rate_table['loex_xy_svr'] = (cnt_table['loex_x^y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # save
rate_table['loex_xy_bsr'] = (cnt_table['loex_x&y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # blown save
# 合併表格
rate_table = pd.concat([rate_table, cnt_table['total'], df_percentage], axis=1)
rate_table = rate_table[['lost_xy_svr', 'lost_xy_bsr', 'lost_x_r', 'lost_y_r', 'lost_x&y_r', 'lost_x^y_r', 'lost_x|y_r',
                         'excl_xy_svr', 'excl_xy_bsr', 'excl_x_r', 'excl_y_r', 'excl_x&y_r', 'excl_x^y_r', 'excl_x|y_r',
                         'loex_xy_svr', 'loex_xy_bsr', 'loex_x_r', 'loex_y_r', 'loex_x&y_r', 'loex_x^y_r', 'loex_x|y_r', 'total']]

In [336]:
# 打印结果
print('Green Line:', len(stats_table_list), 'combos')
display(cnt_table[cnt_table['total'] != 0].reset_index())
display(rate_table[rate_table['total'] != 0].reset_index())

save_dir = "/Users/jackbedford/Desktop/MOXA/Code/results/2023-11-08 results"
cnt_table.reset_index().to_csv(os.path.join(save_dir, "Downlink_Green_Count_Stats.csv"))
pd.concat([rate_table, cnt_table[cnt_table.columns[:-1]]], axis=1).reset_index().to_csv(os.path.join(save_dir, "Downlink_Green_Rate_Stats.csv"))

Green Line: 72 combos


Unnamed: 0,type_x,type_y,lost_x,lost_y,lost_x&y,lost_x^y,lost_x|y,excl_x,excl_y,excl_x&y,excl_x^y,excl_x|y,loex_x,loex_y,loex_x&y,loex_x^y,loex_x|y,total
0,MCGF,MCGF,436,439,433,9,442,4,7,7,4,11,440,446,440,6,446,3302
1,MCGF,SCGF,1246,0,0,1246,1246,21,551,471,90,561,1267,551,471,876,1347,3562
2,MCGF,SCGC-II,922,0,0,922,922,11,104,78,37,115,933,104,78,881,959,5287
3,MCGF,LTEH,4136,0,0,4136,4136,80,129,88,115,203,4216,129,88,4169,4257,16643
4,MCGF,ENBH,2097,0,0,2097,2097,147,21,5,163,168,2244,21,5,2255,2260,7727
5,MCGF,MCGH,0,0,0,0,0,10,0,0,10,10,10,0,0,10,10,20013
6,MCGF,MNBH,1097,77,77,1020,1097,24,443,421,22,443,1121,520,498,645,1143,8326
7,MCGF,SCGM,13390,1150,1090,12360,13450,967,5044,3965,1798,5763,14357,6194,5055,10441,15496,35499
8,SCGF,SCGC-II,0,0,0,0,0,285,52,14,309,323,285,52,14,309,323,1259
9,SCGF,LTEH,0,0,0,0,0,570,27,17,563,580,570,27,17,563,580,4923


Unnamed: 0,type_x,type_y,lost_xy_svr,lost_xy_bsr,lost_x_r,lost_y_r,lost_x&y_r,lost_x^y_r,lost_x|y_r,excl_xy_svr,excl_xy_bsr,excl_x_r,excl_y_r,excl_x&y_r,excl_x^y_r,excl_x|y_r,loex_xy_svr,loex_xy_bsr,loex_x_r,loex_y_r,loex_x&y_r,loex_x^y_r,loex_x|y_r,total
0,MCGF,MCGF,2.036,97.964,13.204,13.295,13.113,0.273,13.386,36.364,63.636,0.121,0.212,0.212,0.121,0.333,1.345,98.655,13.325,13.507,13.325,0.182,13.507,3302
1,MCGF,SCGF,100.0,0.0,34.98,0.0,0.0,34.98,34.98,16.043,83.957,0.59,15.469,13.223,2.527,15.75,65.033,34.967,35.57,15.469,13.223,24.593,37.816,3562
2,MCGF,SCGC-II,100.0,0.0,17.439,0.0,0.0,17.439,17.439,32.174,67.826,0.208,1.967,1.475,0.7,2.175,91.867,8.133,17.647,1.967,1.475,16.664,18.139,5287
3,MCGF,LTEH,100.0,0.0,24.851,0.0,0.0,24.851,24.851,56.65,43.35,0.481,0.775,0.529,0.691,1.22,97.933,2.067,25.332,0.775,0.529,25.05,25.578,16643
4,MCGF,ENBH,100.0,0.0,27.139,0.0,0.0,27.139,27.139,97.024,2.976,1.902,0.272,0.065,2.109,2.174,99.779,0.221,29.041,0.272,0.065,29.183,29.248,7727
5,MCGF,MCGH,,,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.05,0.0,0.0,0.05,0.05,100.0,0.0,0.05,0.0,0.0,0.05,0.05,20013
6,MCGF,MNBH,92.981,7.019,13.176,0.925,0.925,12.251,13.176,4.966,95.034,0.288,5.321,5.056,0.264,5.321,56.43,43.57,13.464,6.245,5.981,7.747,13.728,8326
7,MCGF,SCGM,91.896,8.104,37.719,3.24,3.071,34.818,37.888,31.199,68.801,2.724,14.209,11.169,5.065,16.234,67.379,32.621,40.443,17.448,14.24,29.412,43.652,35499
8,SCGF,SCGC-II,,,0.0,0.0,0.0,0.0,0.0,95.666,4.334,22.637,4.13,1.112,24.543,25.655,95.666,4.334,22.637,4.13,1.112,24.543,25.655,1259
9,SCGF,LTEH,,,0.0,0.0,0.0,0.0,0.0,97.069,2.931,11.578,0.548,0.345,11.436,11.781,97.069,2.931,11.578,0.548,0.345,11.436,11.781,4923


# Red Line

In [337]:
filenames = []

root_dir = "/Users/jackbedford/Desktop/MOXA/Code/data"
date_dirs = ['2023-10-19']
date_dirs = [os.path.join(root_dir, s) for s in sorted(date_dirs)]

for date_dir in date_dirs:
    exp_dirs = [s for s in os.listdir(date_dir) if os.path.isdir(os.path.join(date_dir, s)) and s != '.DS_Store']
    exp_dirs = [os.path.join(date_dir, s) for s in sorted(exp_dirs)]
    for exp_dir in exp_dirs:
        dev_dirs = [s for s in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, s)) and s != '.DS_Store']
        dev_dirs = [os.path.join(exp_dir, s) for s in sorted(dev_dirs)]
        dev_combo_dirs = list(it.combinations(dev_dirs, 2))
        for dev_dir1, dev_dir2 in dev_combo_dirs:
            trip_dirs1 = [s for s in os.listdir(dev_dir1) if os.path.isdir(os.path.join(dev_dir1, s)) and s != '.DS_Store']
            trip_dirs1 = [os.path.join(dev_dir1, s, 'data') for s in sorted(trip_dirs1)]
            trip_dirs2 = [s for s in os.listdir(dev_dir2) if os.path.isdir(os.path.join(dev_dir2, s)) and s != '.DS_Store']
            trip_dirs2 = [os.path.join(dev_dir2, s, 'data') for s in sorted(trip_dirs2)]
            
            for trip_dir1, trip_dir2 in zip(trip_dirs1, trip_dirs2):
                # print((trip_dir1, trip_dir2))
                rrc_file1 = [os.path.join(trip_dir1, s) for s in os.listdir(trip_dir1) if s.endswith('rrc.csv')][0]
                dl_file1 = os.path.join(trip_dir1, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file1 = os.path.join(trip_dir1, 'udp_uplk_loss_latency_ho.pkl')
                rrc_file2 = [os.path.join(trip_dir2, s) for s in os.listdir(trip_dir2) if s.endswith('rrc.csv')][0]
                dl_file2 = os.path.join(trip_dir2, 'udp_dnlk_loss_latency_ho.pkl')
                ul_file2 = os.path.join(trip_dir2, 'udp_uplk_loss_latency_ho.pkl')
                
                # print(rrc_file1, os.path.isfile(rrc_file1))
                # print(dl_file1, os.path.isfile(dl_file1))
                # print(ul_file1, os.path.isfile(ul_file1))
                # print(rrc_file2, os.path.isfile(rrc_file2))
                # print(dl_file2, os.path.isfile(dl_file2))
                # print(ul_file2, os.path.isfile(ul_file2))
                
                if os.path.isfile(rrc_file1) and os.path.isfile(dl_file1) and os.path.isfile(ul_file1) and \
                    os.path.isfile(rrc_file2) and os.path.isfile(dl_file2) and os.path.isfile(ul_file2):
                        filenames.append((dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2))
                
pprint(filenames, sort_dicts=False)

[('/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm00/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm01/#01/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm00/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm01/#01/data/udp_uplk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm00/#01/data/diag_log_sm00_2023-10-19_14-37-09_rrc.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm01/#01/data/diag_log_sm01_2023-10-19_14-37-10_rrc.csv'),
 ('/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm00/#02/data/udp_dnlk_loss_latency_ho.pkl',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-10-19/Bandlock_UDP_9_Phone/sm01/#02/data/udp_dnlk_loss_latency_ho.pkl

In [338]:
selected_cols = ['seq', 'Timestamp', 'lost', 'excl', 'latency', 'xmit_time', 'arr_time',
                 'stage', 'category', 'type', 'cause', 'index', 'inter-RAT', 'inter-eNB', 'inter-Freq', 'band_cng', 'inter-gNB']

ho_types = [
    'NASR', 'MCGF', 'SCGF',
    'SCGA', 'SCGR-I', 'SCGC-I', 'SCGR-II', 'SCGC-II',
    'LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 
]

# list(it.product(ho_types, repeat=2))
# list(it.combinations_with_replacement(ho_types, 2))

stats_table_list = []
for dl_file1, dl_file2, ul_file1, ul_file2, rrc_file1, rrc_file2 in filenames:
    ########### in for loop (iterating exprs) ##### start
    df = pd.read_pickle(dl_file1)[selected_cols]
    df1 = pd.read_pickle(dl_file2)[selected_cols]
    # df_ho, _ = mi_parse_handover(pd.read_csv(rrc_file1))
    # df_ho1, _ = mi_parse_handover(pd.read_csv(rrc_file2))

    df = df.merge(df1, on='seq')
    del df1

    df['type_x'] = df['type_x'].astype('category')
    df['type_x'] = df['type_x'].cat.set_categories(ho_types)
    df['type_y'] = df['type_y'].astype('category')
    df['type_y'] = df['type_y'].cat.set_categories(ho_types)

    df['lost_x&y'] = df['lost_x'] & df['lost_y']
    df['lost_x^y'] = df['lost_x'] ^ df['lost_y']
    df['lost_x|y'] = df['lost_x'] | df['lost_y']

    df['loex_x'] = df['excl_x'].copy()
    df['loex_y'] = df['excl_y'].copy()
    df['excl_x'] = ~df['lost_x'] & df['loex_x']
    df['excl_y'] = ~df['lost_y'] & df['loex_y']

    df['loex_x&y'] = df['loex_x'] & df['loex_y']
    df['loex_x^y'] = df['loex_x'] ^ df['loex_y']
    df['loex_x|y'] = df['loex_x'] | df['loex_y']

    df['excl_x&y'] = df['lost_x&y'] ^ df['loex_x&y']
    df['excl_x^y'] = ~(df['lost_x&y'] | df['lost_x^y']) & df['loex_x^y']
    df['excl_x|y'] = df['excl_x&y'] ^ df['excl_x^y']

    df['total'] = True

    table = df.groupby(['type_x', 'type_y']) \
        [['lost_x', 'lost_y', 'lost_x&y', 'lost_x^y', 'lost_x|y', 
        'excl_x', 'excl_y', 'excl_x&y', 'excl_x^y', 'excl_x|y', 
        'loex_x', 'loex_y', 'loex_x&y', 'loex_x^y', 'loex_x|y', 'total']].sum()
    
    # # complete table
    # display(table.reset_index())
    # # complete dictionary
    # pprint(table.to_dict(orient='index'), sort_dicts=False)
    # # recover the complete table from dictionary
    # display(pd.DataFrame(table.to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())
        
    # # table without all-zero rows
    # display(table[(table != 0).any(axis=1)].reset_index())
    # # dictionary without all-zero rows
    # pprint(table[(table != 0).any(axis=1)].to_dict(orient='index'), sort_dicts=False)
    # # recover the table without all-zero rows from dictionary
    # display(pd.DataFrame(table[(table != 0).any(axis=1)].to_dict(orient='index')).T.rename_axis(['type_x', 'type_y']).reset_index())

    stats_table_list.append(table)
    del table
    ########### in for loop (iterating exprs) ##### end

# 执行逐元素相加操作并得到总和
# cnt_table = stats_table_list[0]
# for df in stats_table_list[1:]:
#     cnt_table = cnt_table.add(df)
cnt_table = reduce(lambda x, y: x.add(y, fill_value=0), stats_table_list)
cnt_dict = cnt_table.to_dict(orient='index')

ho_combos = list(it.combinations_with_replacement(ho_types, 2))
for combo in ho_combos:
    if combo[0] == combo[1]:
        continue
    combo1 = (combo[1], combo[0])
    # print(combo, combo1)
    
    cnt_dict[combo]['lost_x'] += cnt_dict[combo1]['lost_y']
    cnt_dict[combo]['lost_y'] += cnt_dict[combo1]['lost_x']
    cnt_dict[combo]['excl_x'] += cnt_dict[combo1]['excl_y']
    cnt_dict[combo]['excl_y'] += cnt_dict[combo1]['excl_x']
    cnt_dict[combo]['loex_x'] += cnt_dict[combo1]['loex_y']
    cnt_dict[combo]['loex_y'] += cnt_dict[combo1]['loex_x']
    
    cnt_dict[combo]['lost_x&y'] += cnt_dict[combo1]['lost_x&y']
    cnt_dict[combo]['lost_x^y'] += cnt_dict[combo1]['lost_x^y']
    cnt_dict[combo]['lost_x|y'] += cnt_dict[combo1]['lost_x|y']
    cnt_dict[combo]['excl_x&y'] += cnt_dict[combo1]['excl_x&y']
    cnt_dict[combo]['excl_x^y'] += cnt_dict[combo1]['excl_x^y']
    cnt_dict[combo]['excl_x|y'] += cnt_dict[combo1]['excl_x|y']
    cnt_dict[combo]['loex_x&y'] += cnt_dict[combo1]['loex_x&y']
    cnt_dict[combo]['loex_x^y'] += cnt_dict[combo1]['loex_x^y']
    cnt_dict[combo]['loex_x|y'] += cnt_dict[combo1]['loex_x|y']
    
    cnt_dict[combo]['total'] += cnt_dict[combo1]['total']
    
    del cnt_dict[combo1]

cnt_table = pd.DataFrame(cnt_dict).T.rename_axis(['type_x', 'type_y'])

# 打印结果
# display(cnt_table[(cnt_table != 0).any(axis=1)])

# 創建新表格
rate_table = pd.DataFrame()
# 选择所有分子列
molecule_cols = cnt_table.columns[:-1]
# 计算百分比
df_percentage = (cnt_table[molecule_cols].div(cnt_table['total'], axis=0) * 100).round(3)
df_percentage.columns = [f'{col}_r' for col in molecule_cols]
# save / blown save
rate_table['lost_xy_svr'] = (cnt_table['lost_x^y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # save
rate_table['lost_xy_bsr'] = (cnt_table['lost_x&y'].div(cnt_table['lost_x|y'], axis=0) * 100).round(3) # blown save
rate_table['excl_xy_svr'] = (cnt_table['excl_x^y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # save
rate_table['excl_xy_bsr'] = (cnt_table['excl_x&y'].div(cnt_table['excl_x|y'], axis=0) * 100).round(3) # blown save
rate_table['loex_xy_svr'] = (cnt_table['loex_x^y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # save
rate_table['loex_xy_bsr'] = (cnt_table['loex_x&y'].div(cnt_table['loex_x|y'], axis=0) * 100).round(3) # blown save
# 合併表格
rate_table = pd.concat([rate_table, cnt_table['total'], df_percentage], axis=1)
rate_table = rate_table[['lost_xy_svr', 'lost_xy_bsr', 'lost_x_r', 'lost_y_r', 'lost_x&y_r', 'lost_x^y_r', 'lost_x|y_r',
                         'excl_xy_svr', 'excl_xy_bsr', 'excl_x_r', 'excl_y_r', 'excl_x&y_r', 'excl_x^y_r', 'excl_x|y_r',
                         'loex_xy_svr', 'loex_xy_bsr', 'loex_x_r', 'loex_y_r', 'loex_x&y_r', 'loex_x^y_r', 'loex_x|y_r', 'total']]

In [339]:
# 打印结果
print('Red Line:', len(stats_table_list), 'combos')
display(cnt_table[cnt_table['total'] != 0].reset_index())
display(rate_table[rate_table['total'] != 0].reset_index())

save_dir = "/Users/jackbedford/Desktop/MOXA/Code/results/2023-11-08 results"
cnt_table.reset_index().to_csv(os.path.join(save_dir, "Downlink_Red_Count_Stats.csv"))
pd.concat([rate_table, cnt_table[cnt_table.columns[:-1]]], axis=1).reset_index().to_csv(os.path.join(save_dir, "Downlink_Red_Rate_Stats.csv"))

Red Line: 72 combos


Unnamed: 0,type_x,type_y,lost_x,lost_y,lost_x&y,lost_x^y,lost_x|y,excl_x,excl_y,excl_x&y,excl_x^y,excl_x|y,loex_x,loex_y,loex_x&y,loex_x^y,loex_x|y,total
0,NASR,MCGF,856,212,151,766,917,96,135,86,120,206,952,347,237,825,1062,5853
1,NASR,ENBH,856,747,0,1603,1603,96,1059,492,653,1145,952,1806,492,1774,2266,3227
2,NASR,MNBH,4725,6,2,4727,4729,490,610,180,863,1043,5215,616,182,5467,5649,20256
3,NASR,SCGM,411,1,0,412,412,86,18,1,103,104,497,19,1,514,515,5266
4,MCGF,MCGF,141325,149162,60152,170183,230335,33394,42683,26253,45965,72218,174719,191845,86405,193754,280159,686982
5,MCGF,SCGF,4105,1557,729,4204,4933,1890,2018,1098,2342,3440,5995,3575,1827,5916,7743,15811
6,MCGF,SCGA,22218,694,442,22028,22470,1476,6103,2759,4396,7155,23694,6797,3201,24089,27290,73335
7,MCGF,SCGR-I,39,180,36,147,183,180,633,78,673,751,219,813,114,804,918,1944
8,MCGF,SCGC-I,5323,362,107,5471,5578,978,2228,1327,1572,2899,6301,2590,1434,6023,7457,12544
9,MCGF,SCGR-II,17160,2033,931,17331,18262,4616,6585,4486,6013,10499,21776,8618,5417,19560,24977,62180


Unnamed: 0,type_x,type_y,lost_xy_svr,lost_xy_bsr,lost_x_r,lost_y_r,lost_x&y_r,lost_x^y_r,lost_x|y_r,excl_xy_svr,excl_xy_bsr,excl_x_r,excl_y_r,excl_x&y_r,excl_x^y_r,excl_x|y_r,loex_xy_svr,loex_xy_bsr,loex_x_r,loex_y_r,loex_x&y_r,loex_x^y_r,loex_x|y_r,total
0,NASR,MCGF,83.533,16.467,14.625,3.622,2.58,13.087,15.667,58.252,41.748,1.64,2.307,1.469,2.05,3.52,77.684,22.316,16.265,5.929,4.049,14.095,18.145,5853
1,NASR,ENBH,100.0,0.0,26.526,23.148,0.0,49.675,49.675,57.031,42.969,2.975,32.817,15.246,20.236,35.482,78.288,21.712,29.501,55.965,15.246,54.974,70.22,3227
2,NASR,MNBH,99.958,0.042,23.326,0.03,0.01,23.336,23.346,82.742,17.258,2.419,3.011,0.889,4.26,5.149,96.778,3.222,25.745,3.041,0.898,26.99,27.888,20256
3,NASR,SCGM,100.0,0.0,7.805,0.019,0.0,7.824,7.824,99.038,0.962,1.633,0.342,0.019,1.956,1.975,99.806,0.194,9.438,0.361,0.019,9.761,9.78,5266
4,MCGF,MCGF,73.885,26.115,20.572,21.713,8.756,24.773,33.529,63.648,36.352,4.861,6.213,3.821,6.691,10.512,69.159,30.841,25.433,27.926,12.577,28.204,40.781,686982
5,MCGF,SCGF,85.222,14.778,25.963,9.848,4.611,26.589,31.2,68.081,31.919,11.954,12.763,6.945,14.812,21.757,76.404,23.596,37.917,22.611,11.555,37.417,48.972,15811
6,MCGF,SCGA,98.033,1.967,30.297,0.946,0.603,30.037,30.64,61.44,38.56,2.013,8.322,3.762,5.994,9.757,88.27,11.73,32.309,9.268,4.365,32.848,37.213,73335
7,MCGF,SCGR-I,80.328,19.672,2.006,9.259,1.852,7.562,9.414,89.614,10.386,9.259,32.562,4.012,34.619,38.632,87.582,12.418,11.265,41.821,5.864,41.358,47.222,1944
8,MCGF,SCGC-I,98.082,1.918,42.435,2.886,0.853,43.614,44.467,54.226,45.774,7.797,17.761,10.579,12.532,23.111,80.77,19.23,50.231,20.647,11.432,48.015,59.447,12544
9,MCGF,SCGR-II,94.902,5.098,27.597,3.27,1.497,27.872,29.37,57.272,42.728,7.424,10.59,7.215,9.67,16.885,78.312,21.688,35.021,13.86,8.712,31.457,40.169,62180
