In [52]:
import os

import pandas as pd
import numpy as np

from functools import reduce
from datetime import datetime, timedelta

In [53]:
pd.set_option('display.max_columns', None)

In [54]:
data_folder = "../Data/Data v5"

## Data Parsing ENB Counters

In [55]:
# uncomment for classic
enb_counters_path = os.path.join(data_folder, "nwdaf-classic/enb_counters.csv")
enb_name = "classic"

# uncomment for mini
# enb_counters_path = os.path.join(data_folder, "nwdaf-mini/enb_counters.csv")
# enb_name = "mini"

In [56]:
%%time

enb_counters_data_df = pd.read_csv(enb_counters_path, skiprows = 3)

enb_counters_data_df.drop(['Unnamed: 0', 'result'], axis = 1, inplace = True)
enb_counters_data_df.dropna(how = 'all', inplace = True)
enb_counters_data_df.drop(
    enb_counters_data_df[
        ~(enb_counters_data_df['table'].astype(str).str.isnumeric())
    ].index, 
    inplace = True
)

enb_counters_data_df['_time'] = pd.to_datetime(enb_counters_data_df['_time'], format = 'mixed')

enb_counters_ds = enb_counters_data_df.pivot(
            index = ['_time'], 
            columns = '_field', 
            values = '_value'
        ).reset_index()




CPU times: user 26.5 s, sys: 1.64 s, total: 28.1 s
Wall time: 28.1 s


In [57]:
enb_counters_ds.to_csv(os.path.join(data_folder,f'./enb_counters_data_{enb_name}_tabular.csv'), index = False)

## Data Unification

In [58]:
classic_ds = pd.read_csv(os.path.join(data_folder, 'enb_counters_data_classic_tabular.csv'))
mini_ds = pd.read_csv(os.path.join(data_folder, 'enb_counters_data_mini_tabular.csv'))

In [60]:
cell_1_cols = [*filter(lambda x: 'cell_' in x, list(classic_ds.columns))]
cell_2_cols = [*filter(lambda x: 'cell_' in x, list(mini_ds.columns))]

In [61]:
cell_1_metrics = [*map(lambda x: x.split('_'), cell_1_cols)]
cell_2_metrics = [*map(lambda x: x.split('_'), cell_2_cols)]

cell_1_metrics = [*map(lambda x: '_'.join(x[x.index('1') + 1:]), cell_1_metrics)]
cell_2_metrics = [*map(lambda x: '_'.join(x[x.index('2') + 1:]), cell_2_metrics)]


print(set(cell_1_metrics) - set(cell_2_metrics))
print(set(cell_2_metrics) - set(cell_1_metrics))

set()
set()


In [62]:
classic_ds['cell_id'] = 'cell_1'
mini_ds['cell_id'] = 'cell_2'

In [64]:
classic_ds.rename(
    columns = {
        col_name : metric
        for col_name, metric in zip(cell_1_cols, cell_1_metrics)
    },
    inplace = True
)

mini_ds.rename(
    columns = {
        col_name : metric
        for col_name, metric in zip(cell_2_cols, cell_2_metrics)
    },
    inplace = True
)

### Labeling

In [73]:
attack_1_start = "2024-03-23 21:26:00"
attack_1_end = "2024-03-23 22:23:00"

attack_2_start = "2024-03-23 22:56:00"
attack_2_end = "2024-03-23 23:56:00"


def label_df(df):
    
    attack_1_filter = df['_time'].between(attack_1_start, attack_1_end)
    attack_2_filter = df['_time'].between(attack_2_start, attack_2_end)
    
    filter_ = (attack_1_filter | attack_2_filter)
    
    df['label'] = np.where(filter_, 1, 0)
    
    return df.copy()

In [75]:
classic_ds['_time'] = pd.to_datetime(classic_ds['_time'], format = 'mixed')
mini_ds['_time'] = pd.to_datetime(mini_ds['_time'], format = 'mixed')

classic_ds = label_df(classic_ds)
mini_ds = label_df(mini_ds)

In [76]:
classic_ds.to_csv(os.path.join(data_folder, "enb_counters_data_classic_labeled.csv"), index= False)
mini_ds.to_csv(os.path.join(data_folder, "enb_counters_data_mini_labeled.csv"), index= False)