#### Import Packages

In [1]:
import pandas as pd

#### Read Dataset

In [2]:
ue_data = pd.read_csv('ue_dataset_01.csv')

In [3]:
##### Convert the _time column to datetime
ue_data['_time'] = pd.to_datetime(ue_data['_time'])

# Calculate the duration
duration = ue_data['_time'].max() - ue_data['_time'].min()

# Extract days, hours, and minutes from the duration
days = duration.days
hours, remainder = divmod(duration.seconds, 3600)
minutes = remainder // 60

print(f"Duration of the dataset: {days} days, {hours} hours, {minutes} minutes")
print(ue_data['_time'].min())
print(ue_data['_time'].max())

Duration of the dataset: 4 days, 18 hours, 59 minutes
2024-08-17 12:00:01.700000
2024-08-22 06:59:58.267000


#### Create a dataframe per UE

In [4]:
# Get a list of unique user identifiers based on 'imeisv'
unique_imeisvs = ue_data['imeisv'].unique()
print(unique_imeisvs)

[3557821101183501 8642840401624200 8642840401594200 8609960480859058
 8609960468879057 8677660403123800 8628490433231158 8642840401612300
 8609960480666910]


In [5]:
# Create a dictionary to store each user dataframe separately
user_dataframes = {}

# Loop through each unique imeisv and create a separate dataframe for each user
for imeisv in unique_imeisvs:
    user_dataframes[imeisv] = ue_data[ue_data['imeisv'] == imeisv]

#### UE: 8642840401612300 (1) Malicious

In [6]:
# 1st UE: 8642840401612300
ue_1 = user_dataframes[8642840401612300]
ue_1 = ue_1.reset_index(drop=True)
ue_1

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:02.985,8642840401612300,4739106,658920,10204444.0,8.045907e+09,4156.0,131253.0,46.0,0.0,...,20.0,15.0,-126.1,-22.0,22.7,2.659,479.0,0,1,0
1,2024-08-17 12:00:08.114,8642840401612300,4739106,658920,10204864.0,8.045907e+09,4159.0,131637.0,45.0,0.0,...,20.0,15.0,-126.9,-22.0,21.4,2.653,482.0,0,1,0
2,2024-08-17 12:00:13.250,8642840401612300,4739106,658920,10205284.0,8.045908e+09,4127.0,134530.0,44.0,0.0,...,20.0,14.0,-126.7,-22.0,19.2,2.677,488.0,0,1,0
3,2024-08-17 12:00:18.387,8642840401612300,4739106,658920,10205704.0,8.045908e+09,4137.0,133194.0,45.0,0.0,...,20.0,15.0,-125.7,-22.0,21.1,2.725,483.0,0,1,0
4,2024-08-17 12:00:23.520,8642840401612300,4739106,658920,10206208.0,8.045909e+09,4563.0,134536.0,43.0,0.0,...,22.0,14.0,-128.2,-22.0,18.6,2.741,483.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76023,2024-08-22 06:59:37.734,8642840401612300,6984662,1046126,8803956.0,3.443605e+08,4338.0,78791.0,30.0,0.0,...,21.0,15.0,-101.8,-22.0,22.4,2.787,280.0,0,1,0
76024,2024-08-22 06:59:42.871,8642840401612300,6984662,1046126,8804460.0,3.443610e+08,4140.0,77280.0,22.0,0.0,...,20.0,14.0,-101.6,-22.0,22.7,2.676,277.0,0,1,0
76025,2024-08-22 06:59:48.006,8642840401612300,6984662,1046126,8804880.0,3.443614e+08,3932.0,80437.0,25.0,0.0,...,19.0,15.0,-102.6,-22.0,22.5,2.711,283.0,0,1,0
76026,2024-08-22 06:59:53.136,8642840401612300,6984662,1046126,8805300.0,3.443618e+08,4342.0,83717.0,28.0,0.0,...,21.0,15.0,-101.7,-22.0,22.0,2.868,282.0,0,1,0


In [7]:
# Count the number of NaN values in each column
nan_counts = ue_1.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      90
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     0
bearer_1_ul_total_bytes     0
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                      2
ul_path_loss                0
ul_phr                      0
dl_err                      0
dl_mcs                     18
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        0
pusch_snr                   0
turbo_decoder_avg           3
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [8]:
# Ensure timestamps are sorted and converted to datetime
ue_1['_time'] = pd.to_datetime(ue_1['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_1[ue_1['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_1) - 1:
        # Get the previous and next time values
        prev_time = ue_1.loc[idx - 1, '_time']
        next_time = ue_1.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_1.at[idx, '_time'] = avg_time

In [9]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_1.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_1[ue_1[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_1) - 1:
            prev_value = ue_1.loc[idx - 1, column]
            next_value = ue_1.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_1.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_1.fillna(method='ffill', inplace=True)

In [10]:
ue_1.to_csv('Per_UE_Datasets/ue_8642840401612300_malicious.csv',index=False)

#### UE: 8642840401624200 (2) Malicious

In [11]:
# 2nd UE: 8642840401624200
ue_2 = user_dataframes[8642840401624200]
ue_2 = ue_2.reset_index(drop=True)
ue_2

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:01.700,8642840401624200,4821610,670248,5930564.0,4.496936e+09,2251.0,2786.0,4.0,0.0,...,17.0,7.0,-113.2,-17.0,23.1,2.571,10.0,0,1,0
1,2024-08-17 12:00:06.845,8642840401624200,4821610,670248,5930984.0,4.496936e+09,2340.0,2693.0,1.0,0.0,...,18.0,7.0,-116.0,-17.0,19.1,2.364,10.0,0,1,0
2,2024-08-17 12:00:11.997,8642840401624200,4821610,670248,5931488.0,4.496937e+09,2792.0,3047.0,0.0,0.0,...,23.0,7.0,-116.7,-17.0,22.0,1.833,12.0,0,1,0
3,2024-08-17 12:00:17.156,8642840401624200,4821610,670248,5931908.0,4.496937e+09,2346.0,2332.0,0.0,0.0,...,19.0,8.0,-117.2,-16.0,14.6,1.600,10.0,0,1,0
4,2024-08-17 12:00:22.299,8642840401624200,4821610,670248,5932328.0,4.496938e+09,2169.0,2418.0,0.0,0.0,...,15.0,7.0,-117.0,-17.0,16.3,1.600,10.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75299,2024-08-22 06:59:34.786,8642840401624200,6968078,1045044,13393152.0,2.402714e+08,3502.0,4111.0,3.0,0.0,...,20.0,10.0,-114.0,-16.0,9.6,3.143,18.0,0,1,0
75300,2024-08-22 06:59:39.931,8642840401624200,6968078,1045044,13393572.0,2.402718e+08,3879.0,4386.0,1.0,0.0,...,21.0,9.0,-111.8,-16.0,13.0,2.800,19.0,0,1,0
75301,2024-08-22 06:59:45.077,8642840401624200,6968078,1045044,13394076.0,2.402723e+08,4070.0,4341.0,2.0,0.0,...,22.0,10.0,-111.1,-16.0,13.5,2.950,18.0,0,1,0
75302,2024-08-22 06:59:50.242,8642840401624200,6968078,1045044,13394496.0,2.402727e+08,3549.0,4261.0,2.0,0.0,...,20.0,10.0,-111.9,-16.0,13.0,2.950,18.0,0,1,0


In [12]:
# Count the number of NaN values in each column
nan_counts = ue_2.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      79
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     0
bearer_1_ul_total_bytes     0
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                     59
ul_path_loss                0
ul_phr                      0
dl_err                      0
dl_mcs                      1
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        0
pusch_snr                   0
turbo_decoder_avg          58
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [13]:
# Ensure timestamps are sorted and converted to datetime
ue_2['_time'] = pd.to_datetime(ue_2['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_2[ue_2['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_2) - 1:
        # Get the previous and next time values
        prev_time = ue_2.loc[idx - 1, '_time']
        next_time = ue_2.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_2.at[idx, '_time'] = avg_time

In [14]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_2.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_2[ue_2[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_2) - 1:
            prev_value = ue_2.loc[idx - 1, column]
            next_value = ue_2.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_2.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_2.fillna(method='ffill', inplace=True)

In [15]:
ue_2.to_csv('Per_UE_Datasets/ue_8642840401624200_malicious.csv',index=False)

#### UE: 8642840401594200 (3) Malicious

In [16]:
# 3rd UE: 8642840401594200
ue_3 = user_dataframes[8642840401594200]
ue_3 = ue_3.reset_index(drop=True)
ue_3

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:01.700,8642840401594200,4775173,662867,415392.0,433845.0,3413.0,2182.0,4.0,0.0,...,19.0,10.0,-106.6,-15.0,8.2,2.929,10.0,0,1,0
1,2024-08-17 12:00:06.845,8642840401594200,4775173,662867,415812.0,434265.0,3631.0,2090.0,0.0,0.0,...,20.0,10.0,-107.9,-13.0,5.4,1.700,10.0,0,1,0
2,2024-08-17 12:00:11.997,8642840401594200,4775173,662867,416232.0,434685.0,3528.0,2117.0,1.0,0.0,...,20.0,10.0,-107.1,-13.0,21.1,2.091,10.0,0,1,0
3,2024-08-17 12:00:17.156,8642840401594200,4775173,662867,416820.0,435357.0,4533.0,3076.0,6.0,0.0,...,25.0,10.0,-105.7,-15.0,22.0,3.050,14.0,0,1,0
4,2024-08-17 12:00:22.299,8642840401594200,4775173,662867,417324.0,435861.0,3070.0,2519.0,6.0,0.0,...,19.0,10.0,-106.5,-15.0,11.2,3.059,11.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76200,2024-08-22 06:59:34.786,8642840401594200,6936722,1042786,15259346.0,21897344.0,2788.0,3475.0,0.0,0.0,...,23.0,6.0,-105.1,-17.0,19.4,2.692,13.0,0,1,0
76201,2024-08-22 06:59:39.931,8642840401594200,6936722,1042786,15259766.0,21897764.0,2385.0,2598.0,1.0,0.0,...,21.0,6.0,-102.8,-17.0,19.5,2.545,10.0,0,1,0
76202,2024-08-22 06:59:45.077,8642840401594200,6936722,1042786,15260270.0,21898268.0,2524.0,3418.0,1.0,0.0,...,22.0,6.0,-103.1,-17.0,20.2,3.000,12.0,0,1,0
76203,2024-08-22 06:59:50.242,8642840401594200,6936722,1042786,15260690.0,21898688.0,2191.0,2960.0,4.0,0.0,...,20.0,6.0,-103.0,-17.0,21.4,3.286,10.0,0,1,0


In [17]:
# Count the number of NaN values in each column
nan_counts = ue_3.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      79
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     3
bearer_1_ul_total_bytes     3
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                     21
ul_path_loss                0
ul_phr                      0
dl_err                      0
dl_mcs                      0
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        0
pusch_snr                   0
turbo_decoder_avg          21
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [18]:
# Ensure timestamps are sorted and converted to datetime
ue_3['_time'] = pd.to_datetime(ue_3['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_3[ue_3['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_3) - 1:
        # Get the previous and next time values
        prev_time = ue_3.loc[idx - 1, '_time']
        next_time = ue_3.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_3.at[idx, '_time'] = avg_time

In [19]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_3.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_3[ue_3[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_3) - 1:
            prev_value = ue_3.loc[idx - 1, column]
            next_value = ue_3.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_3.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_3.fillna(method='ffill', inplace=True)

In [20]:
ue_3.to_csv('Per_UE_Datasets/ue_8642840401594200_malicious.csv',index=False)

#### UE: 8677660403123800 (4) Malicious

In [21]:
# 4th UE: 8677660403123800
ue_4 = user_dataframes[8677660403123800]
ue_4 = ue_4.reset_index(drop=True)
ue_4

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:01.700,8677660403123800,4759761,664969,4628913.0,4652661.0,6391.0,3041.0,0.0,0.0,...,20.0,15.0,-115.9,-16.0,19.9,3.000,10.0,0,1,0
1,2024-08-17 12:00:06.845,8677660403123800,4759761,664969,4629333.0,4653081.0,6881.0,3022.0,2.0,0.0,...,21.0,15.0,-116.0,-16.0,23.6,3.000,10.0,0,1,0
2,2024-08-17 12:00:11.997,8677660403123800,4759761,664969,4629753.0,4653501.0,6506.0,2963.0,2.0,0.0,...,19.0,15.0,-114.3,-19.0,24.7,2.750,10.0,0,1,0
3,2024-08-17 12:00:17.156,8677660403123800,4759761,664969,4630173.0,4653921.0,6446.0,2964.0,1.0,0.0,...,20.0,15.0,-114.8,-16.0,21.8,2.818,10.0,0,1,0
4,2024-08-17 12:00:22.299,8677660403123800,4759761,664969,4630593.0,4654341.0,6389.0,2928.0,0.0,0.0,...,20.0,15.0,-115.2,-16.0,19.3,2.900,10.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76397,2024-08-22 06:59:37.734,8677660403123800,6940396,1040186,13528681.0,19143851.0,1521.0,90381.0,22.0,0.0,...,21.0,6.0,-99.5,7.0,21.2,3.512,269.0,0,1,0
76398,2024-08-22 06:59:42.871,8677660403123800,6940396,1040186,13529101.0,19144271.0,1358.0,93136.0,22.0,0.0,...,20.0,6.0,-99.4,7.0,23.2,3.406,276.0,0,1,0
76399,2024-08-22 06:59:48.006,8677660403123800,6940396,1040186,13529521.0,19144691.0,1363.0,93871.0,28.0,0.0,...,20.0,5.0,-98.9,7.0,25.8,3.461,278.0,0,1,0
76400,2024-08-22 06:59:53.136,8677660403123800,6940396,1040186,13529941.0,19145111.0,1317.0,91564.0,26.0,0.0,...,18.0,6.0,-99.4,7.0,23.8,3.411,271.0,0,1,0


In [22]:
# Count the number of NaN values in each column
nan_counts = ue_4.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      75
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     1
bearer_1_ul_total_bytes     1
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                      1
ul_path_loss                0
ul_phr                      0
dl_err                      0
dl_mcs                      9
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        0
pusch_snr                   0
turbo_decoder_avg           1
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [23]:
# Ensure timestamps are sorted and converted to datetime
ue_4['_time'] = pd.to_datetime(ue_4['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_4[ue_4['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_4) - 1:
        # Get the previous and next time values
        prev_time = ue_4.loc[idx - 1, '_time']
        next_time = ue_4.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_4.at[idx, '_time'] = avg_time

In [24]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_4.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_4[ue_4[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_4) - 1:
            prev_value = ue_4.loc[idx - 1, column]
            next_value = ue_4.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_4.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_4.fillna(method='ffill', inplace=True)

In [25]:
ue_4.to_csv('Per_UE_Datasets/ue_8677660403123800_malicious.csv',index=False)

#### UE: 3557821101183501 (5) Malicious

In [26]:
# 5th UE: 3557821101183501
ue_5 = user_dataframes[3557821101183501]
ue_5 = ue_5.reset_index(drop=True)
ue_5

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:01.700,3557821101183501,4832474,681308,4673102.0,4660701.0,5484.0,2257.0,0.0,0.0,...,20.0,13.0,-88.7,-12.0,43.0,2.0,10.0,0,1,0
1,2024-08-17 12:00:06.845,3557821101183501,4832474,681308,4673522.0,4661121.0,5421.0,2252.0,0.0,0.0,...,20.0,13.0,-89.8,-12.0,40.1,2.0,10.0,0,1,0
2,2024-08-17 12:00:11.997,3557821101183501,4832474,681308,4674026.0,4661625.0,6272.0,2705.0,0.0,0.0,...,23.0,13.0,-89.4,-12.0,39.8,2.0,12.0,0,1,0
3,2024-08-17 12:00:17.156,3557821101183501,4832474,681308,4674446.0,4662045.0,5194.0,2245.0,0.0,0.0,...,20.0,13.0,-89.8,-12.0,36.9,2.0,10.0,0,1,0
4,2024-08-17 12:00:22.299,3557821101183501,4832474,681308,4674866.0,4662465.0,5401.0,2256.0,0.0,0.0,...,20.0,13.0,-90.1,-12.0,44.3,2.0,10.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76208,2024-08-22 06:59:34.786,3557821101183501,6910390,1036620,13639322.0,19724263.0,2936.0,2252.0,0.0,0.0,...,20.0,9.0,-79.5,-18.0,40.5,2.0,10.0,0,1,0
76209,2024-08-22 06:59:39.931,3557821101183501,6910390,1036620,13639742.0,19724683.0,3154.0,2256.0,0.0,0.0,...,21.0,9.0,-80.0,-18.0,38.4,2.0,10.0,0,1,0
76210,2024-08-22 06:59:45.077,3557821101183501,6910390,1036620,13640246.0,19725187.0,3393.0,2703.0,0.0,0.0,...,22.0,9.0,-78.2,-16.0,40.7,2.0,12.0,0,1,0
76211,2024-08-22 06:59:50.242,3557821101183501,6910390,1036620,13640666.0,19725607.0,3025.0,2247.0,0.0,0.0,...,20.0,9.0,-79.3,-16.0,41.7,2.0,10.0,0,1,0


In [27]:
# Count the number of NaN values in each column
nan_counts = ue_5.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      79
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     0
bearer_1_ul_total_bytes     0
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                      8
ul_path_loss                0
ul_phr                      0
dl_err                      0
dl_mcs                      2
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        0
pusch_snr                   0
turbo_decoder_avg           8
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [28]:
# Ensure timestamps are sorted and converted to datetime
ue_5['_time'] = pd.to_datetime(ue_5['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_5[ue_5['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_5) - 1:
        # Get the previous and next time values
        prev_time = ue_5.loc[idx - 1, '_time']
        next_time = ue_5.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_5.at[idx, '_time'] = avg_time

In [29]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_5.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_5[ue_5[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_5) - 1:
            prev_value = ue_5.loc[idx - 1, column]
            next_value = ue_5.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_5.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_5.fillna(method='ffill', inplace=True)

In [30]:
ue_5.to_csv('Per_UE_Datasets/ue_3557821101183501_malicious.csv',index=False)

#### UE: 8628490433231158 (6) Normal

In [31]:
# 6th UE: 8628490433231158
ue_6 = user_dataframes[8628490433231158]
ue_6 = ue_6.reset_index(drop=True)
ue_6

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:02.985,8628490433231158,456244603,1580385658,1.314950e+05,1.355300e+04,3193275.0,10211945.0,171.0,4.0,...,1385.0,15.0,-133.3,-26.0,15.4,3.796,1265.0,0,0,0
1,2024-08-17 12:00:08.114,8628490433231158,458161828,1587382476,1.314950e+05,1.355300e+04,3203202.0,11110921.0,146.0,0.0,...,1434.0,15.0,-133.3,-11.0,15.6,3.837,1333.0,0,0,0
2,2024-08-17 12:00:13.250,8628490433231158,460089041,1593745497,1.314950e+05,1.355300e+04,3192006.0,10099368.0,169.0,2.0,...,1377.0,15.0,-132.9,-10.0,15.9,3.773,1252.0,0,0,0
3,2024-08-17 12:00:18.387,8628490433231158,462017347,1600818919,1.314950e+05,1.355300e+04,3193739.0,11083242.0,152.0,0.0,...,1396.0,15.0,-132.6,-22.0,15.6,3.883,1339.0,0,0,0
4,2024-08-17 12:00:23.520,8628490433231158,463945114,1607206124,1.314950e+05,1.355300e+04,3206756.0,10251024.0,163.0,2.0,...,1393.0,15.0,-133.3,-10.0,15.2,3.807,1264.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76682,2024-08-22 06:59:37.734,8628490433231158,6926467,545105,2.799739e+10,9.676339e+10,4965.0,60831.0,25.0,0.0,...,24.0,15.0,-105.9,-20.0,15.3,1.875,278.0,0,0,0
76683,2024-08-22 06:59:42.871,8628490433231158,6926467,545105,2.799739e+10,9.676340e+10,5769.0,62191.0,23.0,0.0,...,27.0,15.0,-105.5,-18.0,17.5,1.918,282.0,0,0,0
76684,2024-08-22 06:59:48.006,8628490433231158,6926467,545105,2.799739e+10,9.676340e+10,4974.0,60502.0,28.0,0.0,...,24.0,15.0,-114.1,-20.0,9.3,1.885,276.0,0,0,0
76685,2024-08-22 06:59:53.136,8628490433231158,6926467,545105,2.799739e+10,9.676340e+10,5592.0,61634.0,23.0,0.0,...,27.0,15.0,-105.7,-18.0,18.2,1.921,279.0,0,0,0


In [32]:
# Count the number of NaN values in each column
nan_counts = ue_6.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      90
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     0
bearer_1_ul_total_bytes     0
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                      3
ul_path_loss                0
ul_phr                      0
dl_err                      0
dl_mcs                     22
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        0
pusch_snr                   0
turbo_decoder_avg           4
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [33]:
# Ensure timestamps are sorted and converted to datetime
ue_6['_time'] = pd.to_datetime(ue_6['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_6[ue_6['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_6) - 1:
        # Get the previous and next time values
        prev_time = ue_6.loc[idx - 1, '_time']
        next_time = ue_6.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_6.at[idx, '_time'] = avg_time

In [34]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_6.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_6[ue_6[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_6) - 1:
            prev_value = ue_6.loc[idx - 1, column]
            next_value = ue_6.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_6.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_6.fillna(method='ffill', inplace=True)

In [35]:
ue_6.to_csv('Per_UE_Datasets/ue_8628490433231158_normal.csv',index=False)

#### UE: 8609960480859058 (7) Normal

In [36]:
# 7th UE: 8609960480859058
ue_7 = user_dataframes[8609960480859058]
ue_7 = ue_7.reset_index(drop=True)
ue_7

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:01.700,8609960480859058,4767199,324765,808949091.0,3.640957e+09,3318552.0,10415466.0,155.0,0.0,...,1826.0,12.0,-91.6,16.0,24.6,2.640,1719.0,0,0,0
1,2024-08-17 12:00:06.845,8609960480859058,4767199,324765,810896954.0,3.647772e+09,3320509.0,10715746.0,164.0,0.0,...,1866.0,12.0,-91.5,17.0,24.8,2.657,1761.0,0,0,0
2,2024-08-17 12:00:11.997,8609960480859058,4767199,324765,812842929.0,3.654344e+09,3330485.0,10321520.0,153.0,0.0,...,1842.0,12.0,-92.2,17.0,19.5,2.651,1714.0,0,0,0
3,2024-08-17 12:00:17.156,8609960480859058,4767199,324765,814796135.0,3.661232e+09,3343812.0,10784693.0,162.0,0.0,...,1883.0,12.0,-92.2,17.0,33.3,2.677,1765.0,0,0,0
4,2024-08-17 12:00:22.299,8609960480859058,4767199,324765,816756780.0,3.667772e+09,3331048.0,10332011.0,155.0,0.0,...,1832.0,12.0,-91.7,17.0,24.7,2.615,1718.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76270,2024-08-22 06:59:34.786,8609960480859058,27582997553,35673982108,6909519.0,5.472010e+05,6637.0,5181.0,0.0,0.0,...,39.0,13.0,-79.4,11.0,41.2,2.217,23.0,0,0,0
76271,2024-08-22 06:59:39.931,8609960480859058,27582998657,35673983128,6909519.0,5.472010e+05,6121.0,5640.0,0.0,0.0,...,37.0,13.0,-76.4,10.0,36.3,2.240,25.0,0,0,0
76272,2024-08-22 06:59:45.077,8609960480859058,27582999589,35673984004,6909519.0,5.472010e+05,6952.0,3829.0,0.0,0.0,...,37.0,13.0,-80.1,10.0,38.2,2.235,17.0,0,0,0
76273,2024-08-22 06:59:50.242,8609960480859058,27583000894,35673985080,6909519.0,5.472010e+05,7624.0,5393.0,0.0,0.0,...,42.0,13.0,-78.3,10.0,39.4,2.208,24.0,0,0,0


In [37]:
# Count the number of NaN values in each column
nan_counts = ue_7.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      79
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     0
bearer_1_ul_total_bytes     0
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                      7
ul_path_loss                0
ul_phr                      0
dl_err                      0
dl_mcs                      1
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        0
pusch_snr                   0
turbo_decoder_avg           8
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [38]:
# Ensure timestamps are sorted and converted to datetime
ue_7['_time'] = pd.to_datetime(ue_7['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_7[ue_7['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_7) - 1:
        # Get the previous and next time values
        prev_time = ue_7.loc[idx - 1, '_time']
        next_time = ue_7.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_7.at[idx, '_time'] = avg_time

In [39]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_7.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_7[ue_7[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_7) - 1:
            prev_value = ue_7.loc[idx - 1, column]
            next_value = ue_7.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_7.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_7.fillna(method='ffill', inplace=True)

In [40]:
ue_7.to_csv('Per_UE_Datasets/ue_8609960480859058_normal.csv',index=False)

#### UE: 8609960480666910 (8) Normal

In [41]:
# 8th UE: 8609960480666910
ue_8 = user_dataframes[8609960480666910]
ue_8 = ue_8.reset_index(drop=True)
ue_8

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:02.985,8609960480666910,4684563,312941,9.106507e+08,2.758636e+09,3184776.0,10096962.0,125.0,0.0,...,1462.0,15.0,-132.9,-11.0,15.9,3.587,1254.0,0,0,0
1,2024-08-17 12:00:08.114,8609960480666910,4684563,312941,9.125696e+08,2.765539e+09,3203236.0,10961304.0,121.0,0.0,...,1486.0,15.0,-133.2,-11.0,15.5,3.648,1306.0,0,0,0
2,2024-08-17 12:00:13.250,8609960480666910,4684563,312941,9.144955e+08,2.772017e+09,3182166.0,10235584.0,118.0,0.0,...,1473.0,15.0,-132.9,-11.0,15.3,3.640,1278.0,0,0,0
3,2024-08-17 12:00:18.387,8609960480666910,4684563,312941,9.164223e+08,2.778750e+09,3188127.0,10478021.0,128.0,0.0,...,1579.0,14.0,-135.1,-11.0,12.4,3.665,1272.0,0,0,0
4,2024-08-17 12:00:23.520,8609960480666910,4684563,312941,9.183535e+08,2.785454e+09,3203766.0,10716141.0,114.0,0.0,...,1517.0,15.0,-132.8,-11.0,15.5,3.559,1314.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76641,2024-08-22 06:59:37.734,8609960480666910,6936419,538981,2.898059e+10,2.418389e+11,4263.0,63515.0,27.0,0.0,...,21.0,15.0,-89.9,-11.0,37.4,2.404,285.0,0,0,0
76642,2024-08-22 06:59:42.871,8609960480666910,6936419,538981,2.898059e+10,2.418389e+11,4491.0,65170.0,25.0,0.0,...,22.0,15.0,-94.2,-11.0,27.7,2.397,292.0,0,0,0
76643,2024-08-22 06:59:48.006,8609960480666910,6936419,538981,2.898059e+10,2.418389e+11,4070.0,63947.0,24.0,0.0,...,20.0,15.0,-90.0,-11.0,35.5,2.455,286.0,0,0,0
76644,2024-08-22 06:59:53.136,8609960480666910,6936419,538981,2.898059e+10,2.418389e+11,4035.0,64396.0,20.0,0.0,...,20.0,15.0,-94.3,-11.0,31.9,2.654,286.0,0,0,0


In [42]:
# Count the number of NaN values in each column
nan_counts = ue_8.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                      90
imeisv                      0
bearer_0_dl_total_bytes     0
bearer_0_ul_total_bytes     0
bearer_1_dl_total_bytes     0
bearer_1_ul_total_bytes     0
dl_bitrate                  0
ul_bitrate                  0
ul_retx                     0
ul_err                      0
ul_mcs                      4
ul_path_loss                1
ul_phr                      1
dl_err                      0
dl_mcs                     15
dl_retx                     0
dl_tx                       0
cqi                         0
epre                        0
p_ue                        1
pusch_snr                   0
turbo_decoder_avg           3
ul_tx                       0
attack                      0
malicious                   0
attack_number               0
dtype: int64


In [43]:
# Ensure timestamps are sorted and converted to datetime
ue_8['_time'] = pd.to_datetime(ue_8['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_8[ue_8['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_8) - 1:
        # Get the previous and next time values
        prev_time = ue_8.loc[idx - 1, '_time']
        next_time = ue_8.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_8.at[idx, '_time'] = avg_time

In [44]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_8.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_8[ue_8[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_8) - 1:
            prev_value = ue_8.loc[idx - 1, column]
            next_value = ue_8.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_8.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_8.fillna(method='ffill', inplace=True)

In [45]:
ue_8.to_csv('Per_UE_Datasets/ue_8609960480666910_normal.csv',index=False)

#### UE: 8609960468879057 (9) Normal

In [46]:
# 9th UE: 8609960468879057
ue_9 = user_dataframes[8609960468879057]
ue_9 = ue_9.reset_index(drop=True)
ue_9

Unnamed: 0,_time,imeisv,bearer_0_dl_total_bytes,bearer_0_ul_total_bytes,bearer_1_dl_total_bytes,bearer_1_ul_total_bytes,dl_bitrate,ul_bitrate,ul_retx,ul_err,...,dl_tx,cqi,epre,p_ue,pusch_snr,turbo_decoder_avg,ul_tx,attack,malicious,attack_number
0,2024-08-17 12:00:01.700,8609960468879057,469463419,2988597133,,,3357942.0,19980011.0,3.0,0.0,...,2361.0,15.0,-103.1,-1.0,31.0,2.093,2037.0,0,0,0
1,2024-08-17 12:00:06.845,8609960468879057,471449341,3001302195,,,3361623.0,19940144.0,2.0,0.0,...,2350.0,15.0,-103.2,-1.0,31.7,2.089,2038.0,0,0,0
2,2024-08-17 12:00:11.997,8609960468879057,473438409,3014003437,,,3364216.0,19932744.0,1.0,0.0,...,2350.0,15.0,-103.0,-1.0,31.0,2.099,2036.0,0,0,0
3,2024-08-17 12:00:17.156,8609960468879057,475426690,3026691088,,,3372044.0,19929040.0,2.0,0.0,...,2351.0,15.0,-103.2,-3.0,31.7,2.092,2043.0,0,0,0
4,2024-08-17 12:00:22.299,8609960468879057,477426866,3039456817,,,3371045.0,19992180.0,0.0,0.0,...,2361.0,15.0,-103.0,-1.0,30.7,2.096,2040.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76261,2024-08-22 06:59:34.786,8609960468879057,28496076654,160059150836,,,7314.0,4956.0,0.0,0.0,...,33.0,10.0,-88.3,-19.0,34.5,2.045,22.0,0,0,0
76262,2024-08-22 06:59:39.931,8609960468879057,28496077578,160059151820,,,7106.0,5640.0,0.0,0.0,...,33.0,10.0,-89.7,-19.0,33.5,2.120,25.0,0,0,0
76263,2024-08-22 06:59:45.077,8609960468879057,28496078418,160059152660,,,6466.0,6082.0,0.0,0.0,...,31.0,10.0,-88.4,-19.0,33.7,2.000,27.0,0,0,0
76264,2024-08-22 06:59:50.242,8609960468879057,28496079375,160059153532,,,7311.0,4044.0,0.0,0.0,...,32.0,10.0,-89.4,-19.0,31.7,2.111,18.0,0,0,0


In [47]:
# Count the number of NaN values in each column
nan_counts = ue_9.isna().sum()

print("NaN values in each column:")
print(nan_counts)

NaN values in each column:
_time                         79
imeisv                         0
bearer_0_dl_total_bytes        0
bearer_0_ul_total_bytes        0
bearer_1_dl_total_bytes    76264
bearer_1_ul_total_bytes    76264
dl_bitrate                     0
ul_bitrate                     0
ul_retx                        0
ul_err                         0
ul_mcs                         9
ul_path_loss                   0
ul_phr                         0
dl_err                         0
dl_mcs                         4
dl_retx                        0
dl_tx                          0
cqi                            0
epre                           0
p_ue                           0
pusch_snr                      0
turbo_decoder_avg              9
ul_tx                          0
attack                         0
malicious                      0
attack_number                  0
dtype: int64


In [48]:
# put zeros on the bearer_0_dl and ul total bytes
# Set the specified columns to zero
ue_9['bearer_1_dl_total_bytes'] = 0
ue_9['bearer_1_ul_total_bytes'] = 0

In [49]:
# Ensure timestamps are sorted and converted to datetime
ue_9['_time'] = pd.to_datetime(ue_9['_time'])

# Traverse the DataFrame to find and fill NaT values
for idx in ue_9[ue_9['_time'].isna()].index:
    # Check that there are valid previous and next records
    if idx > 0 and idx < len(ue_9) - 1:
        # Get the previous and next time values
        prev_time = ue_9.loc[idx - 1, '_time']
        next_time = ue_9.loc[idx + 1, '_time']

        # If both previous and next values are valid (not NaT), calculate the average
        if pd.notna(prev_time) and pd.notna(next_time):
            # Calculate the average (midpoint) of prev_time and next_time
            avg_time = prev_time + (next_time - prev_time) / 2
            # Set the NaT value to the calculated average time
            ue_9.at[idx, '_time'] = avg_time

In [50]:
# Traverse each column to replace NaN values with the average of surrounding records
for column in ue_9.columns:
    # Iterate over rows with NaN values in the column
    for idx in ue_9[ue_9[column].isna()].index:
        # Check if we have valid previous and next records
        if idx > 0 and idx < len(ue_9) - 1:
            prev_value = ue_9.loc[idx - 1, column]
            next_value = ue_9.loc[idx + 1, column]

            # Calculate the average only if both previous and next values are not NaN
            if pd.notna(prev_value) and pd.notna(next_value):
                # Calculate the average of previous and next values
                avg_value = (prev_value + next_value) / 2
                # Replace NaN with the calculated average
                ue_9.at[idx, column] = avg_value

# Forward-fill any remaining NaN values with the closest previous value
ue_9.fillna(method='ffill', inplace=True)

In [51]:
ue_9.to_csv('Per_UE_Datasets/ue_8609960468879057_normal.csv',index=False)