In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import seaborn as sns
import swifter
import json
from collections import namedtuple
import re

import itertools
from pprint import pprint

# Functions

In [2]:
# Find files
# This function input the path of experiment directory and output a list of device directories of the experiment directory.
def find_device_under_exp(exp_dir_path):
    dev_dir_list = sorted([os.path.join(exp_dir_path, d) for d in os.listdir(exp_dir_path) if d.startswith('qc') or d.startswith('sm')])
    return dev_dir_list

def find_trace_under_device(dev_dir_path):
    trace_dir_list = sorted([os.path.join(dev_dir_path, d) for d in os.listdir(dev_dir_path)])
    return trace_dir_list


# Convenience instance
class EXPERIMENT():
    def __init__(self, exp_dir_path, settings):
        self.path = exp_dir_path
        self.settings = json.loads(settings)
    def __repr__(self):
        return f'EXP: {self.path} -> {self.settings}'

In [181]:
def parse_mi_ho(f):

    df = pd.read_csv(f)
    df["Timestamp"] = df["Timestamp"].swifter.apply(lambda x: pd.to_datetime(x) + dt.timedelta(hours=8))
    nr_pci = 'O'
    scells = []

    def NR_OTA():

        if df["type_id"].iloc[i] == "5G_NR_RRC_OTA_Packet": return True
        else: return False
    
    def LTE_SERV_INFO():

        if df["type_id"].iloc[i] == "LTE_RRC_Serv_Cell_Info": return True
        else: return False
    

    def find_1st_after(start_idx, target, look_after=1):
        for j in range(start_idx, len(df)):
            t_ = df["Timestamp"].iloc[j]
            if (t_ - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] not in [0,'0'] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_1st_before(start_idx, target, look_before=1):
        for j in range(start_idx, -1, -1):
            t_ = df["Timestamp"].iloc[j]
            if (t - t_).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] not in [0,'0'] and not np.isnan(df[target].iloc[j]):
                return t_, j
        return None, None
    
    def find_in_D_exact(targets):

        l = []
        # In l : (second, ho_type)
        for target in targets:
            for ho in D[target]:
                l.append(((t - ho.start).total_seconds(), target))

        if len(l) != 0:
            for x in l:
                if (x[0]== 0):
                    return x[1]
        
        return None
    
    def find_in_D_first_before(targets, look_before=1):

        l = []
        # In l : (second, ho_type)
        for target in targets:
            for ho in D[target]:
                l.append(((t - ho.end).total_seconds(), target))

        if len(l) != 0:
            closest = min(l, key=lambda x: x[0])
            if 0 <= closest[0] < look_before:
                return closest[1]
        
        return None
    
    HO = namedtuple('HO',['start', 'end', 'others', 'trans'], defaults=[None,None,'',''])
    
    D = {
        'Conn_Rel':[], 
        'Conn_Req':[], # Setup
        'LTE_HO': [], # LTE -> newLTE
        'MN_HO': [], # LTE + NR -> newLTE + NR
        'MN_HO_to_eNB': [], # LTE + NR -> newLTE
        'SN_setup': [], # LTE -> LTE + NR => NR setup
        'SN_Rel': [], # LTE + NR -> LTE
        'SN_HO': [], # LTE + NR -> LTE + newNR  
        'RLF_II': [],
        'RLF_III': [],
        'SCG_RLF': [],
        'Add_SCell': [],
        }

    for i in range(len(df)):

        # Pass NR RRC packet. In NSA mode, LTE RRC packet include NR packet message.
        if NR_OTA() or LTE_SERV_INFO():
            continue

        others = ''
        t = df["Timestamp"].iloc[i]

        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))
            nr_pci = 'O'

        if df["rrcConnectionRequest"].iloc[i] == 1:
            
            # Define end of rrcConnectionRequest to be rrcConnectionReconfigurationComplete or securityModeComplete.
            a = find_1st_after(i, 'rrcConnectionReconfigurationComplete',look_after=2)[0]
            b = find_1st_after(i, 'securityModeComplete',look_after=2)[0]
            if a is None and b is None: end = None
            elif a is None and b is not None: end = b
            elif a is not None and b is None: end = a 
            else: end = a if a > b else b
            
            D['Conn_Req'].append(HO(start=t,end=end))
            nr_pci = 'O'
        
        if df["lte-rrc.t304"].iloc[i] == 1:
            
            end, _ = find_1st_after(i, 'rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = df["PCI"].iloc[i], int(df['lte_targetPhysCellId'].iloc[i])
            serv_freq, target_freq = int(df["Freq"].iloc[i]), int(df['dl-CarrierFreq'].iloc[i])


            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others += f' Set up {n} SCell.'
            else:
                scells = []
            
            if serv_freq != target_freq:
                a,b = find_1st_before(i, "rrcConnectionReestablishmentRequest", 1)
                others += " Inter frequency HO."
                if a is not None:
                    others += " Near after RLF."
                
            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                
                if serv_cell == target_cell and serv_freq == target_freq:

                    a, _ = find_1st_before(i, "rrcConnectionReestablishmentRequest", 3)
                    if a is not None:
                        others += ' Near after RLF.'

                    a = find_in_D_first_before(['MN_HO_to_eNB', 'SN_Rel'])
                    if a is not None:
                        others += f' Near after {a}.'

                    ori_serv = nr_pci
                    nr_pci = int(df['nr_physCellId'].iloc[i])
                    trans = f'{ori_serv} -> {nr_pci}'
                    D['SN_setup'].append(HO(start=t, end=end, others=others, trans=trans))

                else:
                    
                    orig_serv = nr_pci
                    nr_pci = int(df['nr_physCellId'].iloc[i])
                    trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq})'
                    D['MN_HO'].append(HO(start=t, end=end, others=others, trans=trans))

            else:
                
                if serv_cell == target_cell and serv_freq == target_freq:

                    a, b = find_1st_before(i, "scgFailureInformationNR-r15", 2)
                    if a is not None:
                        others += " Caused by scg-failure."
                    
                    orig_serv = nr_pci
                    nr_pci = 'O'
                    trans = f'{orig_serv} -> {nr_pci}'
                    D['SN_Rel'].append(HO(start=t, end=end, others=others, trans=trans))
                    
                else:

                    a, _ = find_1st_before(i,"rrcConnectionSetup",3)
                    if a is not None:
                        others += ' Near After connection setup.'
                    if nr_pci == 'O':
                        trans = f'({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq})'
                        D['LTE_HO'].append(HO(start=t, end=end, others=others, trans=trans))
                    else:
                        orig_serv = nr_pci
                        nr_pci = 'O'
                        trans = f'{orig_serv} -> {nr_pci}'
                        D['MN_HO_to_eNB'].append(HO(start=t, end=end, others=others, trans=trans))


        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:

            end, _ = find_1st_after(i,'rrcConnectionReconfigurationComplete')
            
            orig_serv = nr_pci
            nr_pci = int(df['nr_physCellId'].iloc[i])
            trans = f'{orig_serv} -> {nr_pci}'
            D['SN_HO'].append(HO(start=t,end=end,trans=trans))


        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:

            end1, _ = find_1st_after(i, 'rrcConnectionReestablishmentComplete', look_after=1)
            b, _ = find_1st_after(i, 'rrcConnectionReestablishmentReject', look_after=1)
            end2, _ = find_1st_after(i, 'securityModeComplete',look_after=3)

            others += ' ' + df["reestablishmentCause"].iloc[i] + '.'
            scells = []

            c, _ = find_1st_before(i, 'scgFailureInformationNR-r15', 1)
            if c != None:
                others  += ' caused by scgfailure.'
                
            serv_cell, rlf_cell = df["PCI"].iloc[i], int(df['physCellId.3'].iloc[i])
            serv_freq = int(df['Freq'].iloc[i])
            
            # Type II & Type III
            if end1 is not None: 

                orig_serv = nr_pci
                nr_pci = 'O'
                trans = f'({rlf_cell}) -> ({serv_cell}, {serv_freq}) | {orig_serv} -> {nr_pci}'
                D['RLF_II'].append(HO(start=t,end=end1,others=others,trans=trans))

            elif b is not None and end2 is not None: 
                D['RLF_III'].append(HO(start=t,end=end2,others=others))
            else:
                others+=' No end.'
                D['RLF_II'].append(HO(start=t,others=others))
                print('No end for RLF')

        if df["scgFailureInformationNR-r15"].iloc[i] == 1:

            others += ' ' + df["failureType-r15"].iloc[i] + '.'
            a, idx1 = find_1st_after(i, "rrcConnectionReestablishmentRequest", look_after=1)
            b, idx2 = find_1st_after(i, "lte-rrc.t304", look_after=2)

            if a is not None:

                end1, _ = find_1st_after(idx1, 'rrcConnectionReestablishmentComplete', look_after=1)
                b, _ = find_1st_after(idx1, 'rrcConnectionReestablishmentReject', look_after=1)
                end2 = find_1st_after(idx1, 'securityModeComplete',look_after=3)[0]

                others += ' Result in rrcReestablishment.'
                    
                # Type II & Type III Result
                if end1 is not None: 
                    D['SCG_RLF'].append(HO(start=t,end=end1,others=others))
                elif b is not None and end2 is not None: 
                    D['SCG_RLF'].append(HO(start=t,end=end2,others=others))
                else:
                    others += ' No end.'
                    D['SCG_RLF'].append(HO(start=t,others=others))
                    print('No end for scg failure result in rrcReestablishment.')

            elif b is not None:

                end, _ = find_1st_after(idx2, 'rrcConnectionReconfigurationComplete')
                serv_cell, target_cell = df["PCI"].iloc[idx2], df['lte_targetPhysCellId'].iloc[idx2]
                serv_freq, target_freq = df["Freq"].iloc[idx2], df['dl-CarrierFreq'].iloc[idx2]
                others += ' Result in gNB release.'

                if serv_cell == target_cell and serv_freq == target_freq:
                    D['SCG_RLF'].append(HO(start=t,end=end,others=others))
                else:
                    others += ' Weird gNB release.'
                    print('Weird for scg failure result in gNb Release.')
                    D['SCG_RLF'].append(HO(start=t,end=end,others=others))                  

            else:

                print('No end for scg failure.')
                others += ' No end.'
                D['SCG_RLF'].append(HO(start=t,others=others))
        
        if df['SCellToAddMod-r10'].iloc[i] == 1 and df['physCellId-r10'].iloc[i] != 'nr or cqi report':

            others = ''
            pcis = str(df["physCellId-r10"].iloc[i]).split('@')
            freqs = str(df["dl-CarrierFreq-r10"].iloc[i]).split('@')
            orig_scells = scells
            scells = [(int(float(pci)), int(float(freq))) for pci, freq in zip(pcis, freqs)]

            others += f' Set up {len(scells)} SCell.'
            trans = f'{orig_scells} -> {scells}'

            end, _ = find_1st_after(i,'rrcConnectionReconfigurationComplete')
            
            a, _ = find_1st_before(i, "rrcConnectionReestablishmentRequest", 3)
            if a is not None:
                others += ' Near after RLF.'

            a = find_in_D_exact(['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 'SN_Rel'])
            if a is not None:
                others += f' With {a}.'

            D['Add_SCell'].append(HO(start=t,end=end,others=others, trans=trans))
    
    return D


In [327]:
def loss_excl_cause(loss_lat_file_path, rrc_file_path):

    loss_lat_df = pd.read_csv(loss_lat_file_path)

    loss_cond = loss_lat_df['lost'] == True
    loss_packets = loss_lat_df[loss_cond]
    loss_packets = loss_packets.reset_index(drop=True)
    loss_packets['Timestamp'] = pd.to_datetime(loss_packets['Timestamp'])

    exc_lat = 0.1
    excl_cond = (loss_cond==False) & (loss_lat_df['latency'] > exc_lat)
    excl_packets = loss_lat_df[excl_cond]
    excl_packets = excl_packets.reset_index(drop=True)
    excl_packets['Timestamp'] = pd.to_datetime(excl_packets['Timestamp'])

    HO_dict = parse_mi_ho(rrc_file_path)
    events = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 
              'SN_Rel', 'SN_HO', 'RLF_II', 'RLF_III', 'SCG_RLF']
    slots = [dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1),
             dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=2), dt.timedelta(seconds=2), dt.timedelta(seconds=2)]
    
    LOSS_PKT = namedtuple('LOSS_PKT',['timestamp', 'seq', 'cause', 'trans', 'trans_time'], defaults=['', 0, [], [], []])

    LOSS_PKTs = []

    for i in range(len(loss_packets)):

        loss_packet = loss_packets.iloc[i]
        loss_packet_timestamp = loss_packet['Timestamp']
        seq = loss_packet['seq']
        
        cause = []
        trans = []
        trans_time = []

        for HO_type, slot in zip(events, slots):
            
            HOs = HO_dict[HO_type]  

            for h in HOs:
                
                if h.start - slot < loss_packet_timestamp < h.start:
                    cause.append(f'Before {HO_type}') 
                    trans.append(h.trans)
                    trans_time.append(h.start)
                elif h.start < loss_packet_timestamp < h.end:
                    cause.append(f'During {HO_type}') 
                    trans.append(h.trans)
                    trans_time.append((h.start, h.end))
                elif h.end < loss_packet_timestamp < h.end + slot:
                    cause.append(f'After {HO_type}') 
                    trans.append(h.trans)
                    trans_time.append(h.end)

        LOSS_PKTs.append(LOSS_PKT(timestamp=loss_packet_timestamp, seq=seq, cause=cause))
                
    EXCL_PKT = namedtuple('EXCL_PKT',['timestamp', 'seq', 'cause', 'trans', 'trans_time'], defaults=['', 0, [], [], []])

    events = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 
              'SN_Rel', 'SN_HO', 'RLF_II', 'RLF_III', 'SCG_RLF']
    slots = [dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1),
             dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=2), dt.timedelta(seconds=2), dt.timedelta(seconds=1)]
    
    EXCL_PKTs = []

    for i in range(len(excl_packets)):

        excl_packet = excl_packets.iloc[i]
        excl_packet_timestamp = excl_packet['Timestamp']
        seq = excl_packet['seq']

        cause = []
        trans = []
        trans_time = []

        for HO_type, slot in zip(events, slots):
            
            HOs = HO_dict[HO_type]   
            for h in HOs:
                
                if h.start - slot < excl_packet_timestamp < h.start:
                    cause.append(f'Before {HO_type}') 
                    trans.append(h.trans)
                    trans_time.append(h.start)
                elif h.start < excl_packet_timestamp < h.end:
                    cause.append(f'During {HO_type}') 
                    trans.append(h.trans)
                    trans_time.append((h.start, h.end))
                elif h.end < excl_packet_timestamp < h.end + slot:
                    cause.append(f'After {HO_type}') 
                    trans.append(h.trans)
                    trans_time.append(h.end)

        EXCL_PKTs.append(EXCL_PKT(timestamp=excl_packet_timestamp, seq=seq, cause=cause, trans=trans, trans_time=trans_time))
    
    return LOSS_PKTs, EXCL_PKTs


def loss_excl_cause_dual(loss_lat_file_path1, loss_lat_file_path2, rrc_file_path1, rrc_file_path2):

    df1 = pd.read_csv(loss_lat_file_path1)
    df2 = pd.read_csv(loss_lat_file_path2)

    start_seq = df1['seq'].iloc[0] if df1['seq'].iloc[0] >=  df2['seq'].iloc[0] else df2['seq'].iloc[0]
    end_seq = df1['seq'].iloc[-1] if df1['seq'].iloc[-1] <=  df2['seq'].iloc[-1] else df2['seq'].iloc[-1]

    cond1 = (df1['seq'] >= start_seq) & (df1['seq'] <= end_seq)
    df1 = df1[cond1]
    df1 = df1.reset_index(drop=True)
    cond2 = (df2['seq'] >= start_seq) & (df2['seq'] <= end_seq)
    df2 = df2[cond2]
    df2 = df2.reset_index(drop=True)

    # Loss calculate for dual radios redundant packets.
    loss_cond = (df1['lost'] == True) & (df2['lost'] == True)

    loss_packets1 = df1[loss_cond]
    loss_packets1 = loss_packets1.reset_index(drop=True)
    loss_packets1['Timestamp'] = pd.to_datetime(loss_packets1['Timestamp'])

    loss_packets2 = df2[loss_cond]
    loss_packets2 = loss_packets2.reset_index(drop=True)
    loss_packets2['Timestamp'] = pd.to_datetime(loss_packets2['Timestamp'])

    # Excexxive latency calculate for dual radios redundant packets.
    exc_lat = 0.1 
    excl_cond1 = (loss_cond==False) & (df1['latency'] > exc_lat)
    excl_cond2 = (loss_cond==False) & (df2['latency'] > exc_lat)
    excl_cond = (excl_cond1 == True) & (excl_cond2 == True)
    
    excl_packets1 = df1[excl_cond]
    excl_packets1 = excl_packets1.reset_index(drop=True)
    excl_packets1['Timestamp'] = pd.to_datetime(excl_packets1['Timestamp'])

    excl_packets2 = df2[excl_cond]
    excl_packets2 = excl_packets2.reset_index(drop=True)
    excl_packets2['Timestamp'] = pd.to_datetime(excl_packets2['Timestamp'])

    HO_dict1 = parse_mi_ho(rrc_file_path1)
    HO_dict2 = parse_mi_ho(rrc_file_path2)
    
    events = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 
              'SN_Rel', 'SN_HO', 'RLF_II', 'RLF_III', 'SCG_RLF']
    slots = [dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1),
             dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=2), dt.timedelta(seconds=2), dt.timedelta(seconds=2)]
    
    LOSS_PKT_DUAL = namedtuple('LOSS_PKT_DUAL',
                               ['timestamp1', 'timestamp2', 'seq', 'cause1', 'cause2', 'trans1', 'trans2', 'trans1_time', 'trans2_time'], 
                               defaults=['', '', 0, [], [], [], [], [], []])

    LOSS_PKT_DUALs = []

    for i in range(len(loss_packets1)):

        loss_packet1 = loss_packets1.iloc[i]
        loss_packet1_timestamp = loss_packet1['Timestamp']

        loss_packet2 = loss_packets2.iloc[i]
        loss_packet2_timestamp = loss_packet2['Timestamp']

        seq = loss_packet1['seq']
        
        cause1, cause2 = [], []
        trans1, trans2 = [], []
        trans1_time, trans2_time = [], []

        for HO_type, slot in zip(events, slots):
            
            HOs1 = HO_dict1[HO_type]
            HOs2 = HO_dict2[HO_type]   

            for h in HOs1:
                
                if h.start - slot < loss_packet1_timestamp < h.start:
                    cause1.append(f'Before {HO_type}') 
                    trans1.append(h.trans)
                    trans1_time.append(h.start)

                elif (h.end is not None) and (h.start < loss_packet1_timestamp < h.end):
                    cause1.append(f'During {HO_type}')
                    trans1.append(h.trans)
                    trans1_time.append((h.start, h.end))

                elif (h.end is not None) and (h.end < loss_packet1_timestamp < h.end + slot):
                    cause1.append(f'After {HO_type}')
                    trans1.append(h.trans)
                    trans1_time.append(h.end)
            
            for h in HOs2:
                
                if h.start - slot < loss_packet2_timestamp < h.start:
                    cause2.append(f'Before {HO_type}') 
                    trans2.append(h.trans)
                    trans2_time.append(h.start)

                elif (h.end is not None) and (h.start < loss_packet2_timestamp < h.end):
                    cause2.append(f'During {HO_type}')
                    trans2.append(h.trans)
                    trans2_time.append((h.start, h.end))

                elif (h.end is not None) and (h.end < loss_packet2_timestamp < h.end + slot):
                    cause2.append(f'After {HO_type}')
                    trans2.append(h.trans)
                    trans2_time.append(h.end)
    
        LOSS_PKT_DUALs.append(LOSS_PKT_DUAL(timestamp1=loss_packet1_timestamp, timestamp2=loss_packet2_timestamp, seq=seq, 
        cause1=cause1, cause2=cause2, trans1=trans1, trans2=trans2, trans1_time=trans1_time, trans2_time=trans2_time))
                
    slot = dt.timedelta(seconds=2)
    EXCL_PKT_DUAL = namedtuple('EXCL_PKT_DUAL',
                               ['timestamp1', 'timestamp2', 'seq', 'cause1', 'cause2', 'trans1', 'trans2', 'trans1_time', 'trans2_time'], 
                               defaults=['', '', 0, [], [], [], [], [], []])

    events = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 
              'SN_Rel', 'SN_HO', 'RLF_II', 'RLF_III', 'SCG_RLF']
    slots = [dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=1),
             dt.timedelta(seconds=1), dt.timedelta(seconds=1), dt.timedelta(seconds=2), dt.timedelta(seconds=2), dt.timedelta(seconds=2)]
    
    
    EXCL_PKT_DUALs = []

    for i in range(len(excl_packets1)):

        excl_packet1 = excl_packets1.iloc[i]
        excl_packet1_timestamp = excl_packet1['Timestamp']
        excl_packet2 = excl_packets2.iloc[i]
        excl_packet2_timestamp = excl_packet2['Timestamp']

        seq = excl_packet1['seq']

        cause1, cause2 = [], []
        trans1, trans2 = [], []
        trans1_time, trans2_time = [], []

        for HO_type, slot in zip(events, slots):
            
            HOs1 = HO_dict1[HO_type]
            HOs2 = HO_dict2[HO_type]

            for h in HOs1:
                
                if h.start - slot < excl_packet1_timestamp < h.start:
                    cause1.append(f'Before {HO_type}') 
                    trans1.append(h.trans)
                    trans1_time.append(h.start)

                elif (h.end is not None) and (h.start < excl_packet1_timestamp < h.end):
                    cause1.append(f'During {HO_type}')
                    trans1.append(h.trans)
                    trans1_time.append((h.start, h.end))

                elif (h.end is not None) and (h.end < excl_packet1_timestamp < h.end + slot):
                    cause1.append(f'After {HO_type}')
                    trans1.append(h.trans)
                    trans1_time.append(h.end)

            for h in HOs2:
                
                if h.start - slot < excl_packet2_timestamp < h.start:
                    cause2.append(f'Before {HO_type}') 
                    trans2.append(h.trans)
                    trans2_time.append(h.start)

                elif (h.end is not None) and (h.start < excl_packet2_timestamp < h.end):
                    cause2.append(f'During {HO_type}')
                    trans2.append(h.trans)
                    trans2_time.append((h.start, h.end))

                elif (h.end is not None) and (h.end < excl_packet2_timestamp < h.end + slot):
                    cause2.append(f'After {HO_type}')
                    trans2.append(h.trans)                   
                    trans2_time.append(h.end)

        EXCL_PKT_DUALs.append(EXCL_PKT_DUAL(timestamp1=excl_packet1_timestamp, timestamp2=excl_packet2_timestamp, seq=seq, 
        cause1=cause1, cause2=cause2, trans1=trans1, trans2=trans2, trans1_time=trans1_time, trans2_time=trans2_time))

    return LOSS_PKT_DUALs, EXCL_PKT_DUALs

# Analyse every case.
EVENTS = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB', 'SN_setup', 
        'SN_Rel', 'SN_HO', 'RLF_II', 'RLF_III', 'SCG_RLF']
EVENTS1 = ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB']
CASES = ['all', 'two_RLF', 'two_scg_failure', 'one_RLF_one_scg', 'one_RLF', 'one_scg_failure'] + \
        ['two_identicle_HO'] + [f'two_identicle_{type}' for type in EVENTS] + \
        ['two_identicle_RLF_SN_setup'] + \
        ['pci_identicle_HO'] + [f'pci_identicle_{type}' for type in EVENTS1]

ANALYSIS = namedtuple('ANALYSIS', CASES, defaults = [0]*len(CASES))

def Analyze(pkgs):
    
    # case functions.
    # Case source and target cause type, trans are exactly the same.
    # A is cause list and B is trans list.
    def find_identicle(A1, B1, A2, B2):

        L = []
        # Take out before, during, and after.
        A1 = [a1.split(' ')[-1] for a1 in A1]
        A2 = [a2.split(' ')[-1] for a2 in A2]

        for i, (a1, b1) in enumerate(zip(A1, B1)):    
            for j, (a2, b2) in enumerate(zip(A2, B2)):
                if a1 == a2 and b1 == b2:
                    L.append((i, j))
        return L
    
    # Case source and target cause type, trans pci are exactly the same.
    # A is cause list and B is trans list.
    # This case only deal with ['LTE_HO', 'MN_HO', 'MN_HO_to_eNB'].
    def find_pci_identicle(A1, B1, A2, B2):

        L = []

        def extract_coordinates(input_string):
            pattern = r'\((\d+), (\d+)\) -> \((\d+), (\d+)\)'
            match = re.match(pattern, input_string)
            
            if match:
                return f'{match.group(1)} -> {match.group(3)}'
            else:
                return None
        
        # Take out before, during, and after.
        A1 = [a1.split(' ')[-1] for a1 in A1]
        A2 = [a2.split(' ')[-1] for a2 in A2]
        B1 = [extract_coordinates(b1) for b1 in B1]
        B2 = [extract_coordinates(b2) for b2 in B2]

        for i, (a1, b1) in enumerate(zip(A1, B1)):    
            for j, (a2, b2) in enumerate(zip(A2, B2)):
                
                if a1 not in EVENTS1 or a2 not in EVENTS1:
                    continue

                if a1 == a2 and b1 == b2:
                    L.append((i, j))
        return L

    # Count the number of every case. 
    nums = {k: 0 for k in CASES}
    nums['all'] = len(pkgs)

    for pkg in pkgs:

        cause1_string = "".join(pkg.cause1)
        cause2_string = "".join(pkg.cause2)

        if ('RLF_' in cause1_string) and ('RLF_' in cause2_string):   
            nums['two_RLF'] += 1

        elif ('SCG_RLF' in cause1_string) and ('SCG_RLF' in cause2_string): 
            nums['two_scg_failure'] += 1
        
        elif (('RLF_' in cause1_string) and ('SCG_RLF' in cause2_string) ) or (('SCG_RLF' in cause1_string) and ('RLF_' in cause2_string) ):
            nums['one_RLF_one_scg'] += 1

        elif ('SCG_RLF' in cause1_string) or ('SCG_RLF' in cause2_string): 
            nums['one_scg_failure'] += 1

        elif ('RLF_' in cause1_string) or ('RLF_' in cause2_string):  
            nums['one_RLF'] += 1

        L1 = find_identicle(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2)
        if len(L1) != 0: nums['two_identicle_HO'] += 1
        
        identicle_types = []
        for (i, _) in L1:
            _, ho_type = pkg.cause1[i].split(' ')
            identicle_types.append(ho_type)
            nums[f'two_identicle_{ho_type}'] += 1

        if ('RLF_II' in identicle_types or 'RLF_III' in identicle_types) and ('SN_setup' in identicle_types):
            nums[f'two_identicle_RLF_SN_setup'] += 1


        L2 = find_pci_identicle(pkg.cause1, pkg.trans1, pkg.cause2, pkg.trans2)
        L2 = [element for element in L2 if element not in L1] # Take out repeated element in L1. 

        if len(L2) != 0: nums['pci_identicle_HO'] += 1

        for (i, _) in L2:
            _, ho_type = pkg.cause1[i].split(' ')
            nums[f'pci_identicle_{ho_type}'] += 1        

    return ANALYSIS(*nums.values())

# Main

In [320]:
rrc_file_path1 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/diag_log_sm01_2023-06-12_16-30-21_rrc.csv'
ml1_file_path1 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/diag_log_sm01_2023-06-12_16-30-21_ml1.csv'
nr_ml1_file_path1 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/diag_log_sm01_2023-06-12_16-30-21_nr_ml1.csv'
dl_file_path1 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/udp_dnlk_loss_latency.csv'
ul_file_path1 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/udp_uplk_loss_latency.csv'

rrc_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm03/#01/data/diag_log_sm03_2023-06-12_16-30-21_rrc.csv'
ml1_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm03/#01/data/diag_log_sm03_2023-06-12_16-30-21_ml1.csv'
nr_ml1_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm03/#01/data/diag_log_sm03_2023-06-12_16-30-21_nr_ml1.csv'
dl_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm03/#01/data/udp_dnlk_loss_latency.csv'
ul_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm03/#01/data/udp_uplk_loss_latency.csv'

In [321]:
rrc_file_path1 = "/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm00/#01/data/diag_log_sm00_2023-06-12_16-30-21_rrc.csv"
ml1_file_path1 = "/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm00/#01/data/diag_log_sm00_2023-06-12_16-30-21_ml1.csv"
nr_ml1_file_path1 = "/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm00/#01/data/diag_log_sm00_2023-06-12_16-30-21_nr_ml1.csv"
dl_file_path1 = "/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm00/#01/data/udp_dnlk_loss_latency.csv"
ul_file_path1 = "/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm00/#01/data/udp_uplk_loss_latency.csv"

rrc_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/diag_log_sm01_2023-06-12_16-30-21_rrc.csv'
ml1_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/diag_log_sm01_2023-06-12_16-30-21_ml1.csv'
nr_ml1_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/diag_log_sm01_2023-06-12_16-30-21_nr_ml1.csv'
dl_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/udp_dnlk_loss_latency.csv'
ul_file_path2 = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#01/data/udp_uplk_loss_latency.csv'

In [322]:
a, b = loss_excl_cause_dual(ul_file_path1, ul_file_path2, rrc_file_path1, rrc_file_path2)

In [323]:
pkg = b[20]
pkg

EXCL_PKT_DUAL(timestamp1=Timestamp('2023-06-12 16:33:12.853898256'), timestamp2=Timestamp('2023-06-12 16:33:12.852297528'), seq=93517, cause1=['Before MN_HO', 'After SN_HO'], cause2=['Before MN_HO'], trans1=['(73, 1750) -> (294, 1750)', '16 -> 73'], trans2=['(73, 1750) -> (294, 1750)'], trans1_time=[Timestamp('2023-06-12 16:33:13.243937'), Timestamp('2023-06-12 16:33:12.340381')], trans2_time=[Timestamp('2023-06-12 16:33:13.824280')])

In [324]:
# This place give a XXXX-XX-XX.md file and find the experiment directory path
# and the corresponding band settings. It will be presented by a list of special
# instance EXPERIMENTs.

md_file_path = '/home/wmnlab/D/database/2023-06-12/2023-06-12.md'
date_dir_path = os.path.dirname(md_file_path)
EXPs = []

with open(md_file_path) as f:

    exp = f.readline()[:-1]
    settings = f.readline()[:-1]

    while exp != '#endif' and settings:
        E = EXPERIMENT(os.path.join(date_dir_path, exp), settings)
        EXPs.append(E)
        exp = f.readline()[:-1]
        settings = f.readline()[:-1]

EXPs

[EXP: /home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone -> {'sm00': 'B3B7B8', 'sm01': 'B3', 'sm02': 'B7', 'sm03': 'B8', 'sm04': 'B3B7', 'sm05': 'B3B8', 'sm06': 'B7B8', 'sm07': 'LTE'}]

In [328]:
# "This code counts the number of occurrences for each case
#  

# Still need to revise here.
EXP = EXPs[0]
exp_dir_path = EXP.path
settings = EXP.settings 

dev_dir_list = find_device_under_exp(exp_dir_path)
comb = itertools.combinations(dev_dir_list, 2)

# Record
keys = ['dl_loss', 'dl_excl', 'ul_loss', 'ul_excl']
cases = ANALYSIS._fields
analysis_dict_all = {}
for k in keys:
    analysis_dict_all[k] = {case: 0 for case in cases}

analysis_dicts = []
corresponding_list = []

for dev_dir_path1, dev_dir_path2 in comb:
    
    dev1 = dev_dir_path1.split('/')[-1]
    dev2 = dev_dir_path2.split('/')[-1]
    print(dev1, dev2)

    trace_dir_list1 = find_trace_under_device(dev_dir_path1)
    trace_dir_list2 = find_trace_under_device(dev_dir_path2)

    for trace_dir_path1, trace_dir_path2 in zip(trace_dir_list1, trace_dir_list2):

        trace = trace_dir_path1.split('/')[-1]

        data_dir_path1 = os.path.join(trace_dir_path1, 'data')
        rrc_file1 = [p for p in os.listdir(data_dir_path1) if p.endswith('_rrc.csv')][0]
        rrc_file_path1 = os.path.join(data_dir_path1, rrc_file1)
        dl_file_path1 = os.path.join(data_dir_path1, 'udp_dnlk_loss_latency.csv')
        ul_file_path1 = os.path.join(data_dir_path1, 'udp_uplk_loss_latency.csv')

        data_dir_path2 = os.path.join(trace_dir_path2, 'data')
        rrc_file2 = [p for p in os.listdir(data_dir_path2) if p.endswith('_rrc.csv')][0]
        rrc_file_path2 = os.path.join(data_dir_path2, rrc_file2)
        dl_file_path2 = os.path.join(data_dir_path2, 'udp_dnlk_loss_latency.csv')
        ul_file_path2 = os.path.join(data_dir_path2, 'udp_uplk_loss_latency.csv')
    
        dl_loss_pkgs, dl_excl_pkgs  = loss_excl_cause_dual(dl_file_path1, dl_file_path2, rrc_file_path1, rrc_file_path2)
        ul_loss_pkgs, ul_excl_pkgs = loss_excl_cause_dual(ul_file_path1, ul_file_path2, rrc_file_path1, rrc_file_path2)

        keys = ['dl_loss', 'dl_excl', 'ul_loss', 'ul_excl']
        values = [Analyze(dl_loss_pkgs), Analyze(dl_excl_pkgs), Analyze(ul_loss_pkgs), Analyze(ul_excl_pkgs)]
        analysis_dict = {k: v for k, v in zip(keys, values)}
        
        for k in keys:
            for i, case in enumerate(cases):
                analysis_dict_all[k][case] += analysis_dict[k][i]
        
        analysis_dicts.append(analysis_dict)
        corresponding_list.append((dev1, dev2, trace))


sm00 sm01
sm00 sm02
sm00 sm03
sm00 sm04
sm00 sm05
sm00 sm06
sm00 sm07
sm01 sm02
sm01 sm03
sm01 sm04
sm01 sm05
sm01 sm06
sm01 sm07
sm02 sm03
sm02 sm04
sm02 sm05
sm02 sm06
sm02 sm07
sm03 sm04
sm03 sm05
sm03 sm06
sm03 sm07
sm04 sm05
sm04 sm06
sm04 sm07
sm05 sm06
sm05 sm07
sm06 sm07


In [329]:
analysis_dict_all

{'dl_loss': {'all': 63886,
  'two_RLF': 34816,
  'two_scg_failure': 0,
  'one_RLF_one_scg': 549,
  'one_RLF': 21440,
  'one_scg_failure': 851,
  'two_identicle_HO': 25970,
  'two_identicle_LTE_HO': 652,
  'two_identicle_MN_HO': 605,
  'two_identicle_MN_HO_to_eNB': 0,
  'two_identicle_SN_setup': 19037,
  'two_identicle_SN_Rel': 0,
  'two_identicle_SN_HO': 827,
  'two_identicle_RLF_II': 10696,
  'two_identicle_RLF_III': 0,
  'two_identicle_SCG_RLF': 0,
  'two_identicle_RLF_SN_setup': 5175,
  'pci_identicle_HO': 1011,
  'pci_identicle_LTE_HO': 905,
  'pci_identicle_MN_HO': 59,
  'pci_identicle_MN_HO_to_eNB': 47},
 'dl_excl': {'all': 44934,
  'two_RLF': 3406,
  'two_scg_failure': 97,
  'one_RLF_one_scg': 434,
  'one_RLF': 9651,
  'one_scg_failure': 1076,
  'two_identicle_HO': 8137,
  'two_identicle_LTE_HO': 0,
  'two_identicle_MN_HO': 342,
  'two_identicle_MN_HO_to_eNB': 27,
  'two_identicle_SN_setup': 1874,
  'two_identicle_SN_Rel': 0,
  'two_identicle_SN_HO': 5819,
  'two_identicle_RLF_I

In [141]:
analysis_dict_all = {}
for k in keys:
    analysis_dict_all[k] = {case: 0 for case in cases}

for analysis_dict in analysis_dicts:
    for k in keys:        
        for i, case in enumerate(cases):
            analysis_dict_all[k][case] += analysis_dict[k][i]


In [142]:
analysis_dict_all

{'dl_loss': {'all': 63886,
  'one_RLF': 35365,
  'two_RLF': 22291,
  'two_identicle_HO': 15569},
 'dl_excl': {'all': 44934,
  'one_RLF': 3937,
  'two_RLF': 10727,
  'two_identicle_HO': 5785},
 'ul_loss': {'all': 3710,
  'one_RLF': 1088,
  'two_RLF': 221,
  'two_identicle_HO': 209},
 'ul_excl': {'all': 1031406,
  'one_RLF': 21637,
  'two_RLF': 49872,
  'two_identicle_HO': 36238}}

In [55]:
# file = '/home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#06/data/diag_log_sm01_2023-06-12_17-16-04_rrc.csv'
# L = parse_mi_ho(file)
# L

In [None]:
# /home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm00/#06/data/diag_log_sm00_2023-06-12_17-16-04_rrc.csv
# /home/wmnlab/D/database/2023-06-12/Bandlock_8_Schemes_Phone/sm01/#06/data/diag_log_sm01_2023-06-12_17-16-04_rrc.csv

In [144]:
35365+22291

73225