In [14]:
import pandas as pd

In [15]:
# Load the full dataset
path = 'Data/ui_data_2.csv'
data = pd.read_csv(path)
data.shape

(6596, 10)

In [16]:
# Fill missing RSSI values
data['rssi_ap_digilab'] = data['rssi_ap_digilab'].fillna(-200)
data['rssi_ap_dosen'] = data['rssi_ap_dosen'].fillna(-200)

In [17]:
data = data[~data['reference_point'].isin(['RP74'])]

# Ensure the relative_position column exists with a default numeric value (-1 for unassigned)
data['relative_position'] = -1  # Using -1 to indicate unassigned values initially

# Define reference points that should be assigned a relative position of 0
zero_rps = (
    list(range(1, 21)) +   # RP1-20
    list(range(53, 57)) +  # RP53-56
    list(range(58, 62)) +  # RP58-61
    list(range(63, 65)) +  # RP63-64
    list(range(66, 73)) +  # RP66-72
    list(range(75, 78)) +  # RP75-77
    list(range(80, 89)) +  # RP80-88
    list(range(113, 116)) +  # RP113-115
    list(range(118, 121)) +  # RP118-120
    list(range(93, 96)) +
    list(range(98, 101)) +
    list(range(103, 106)) +
    list(range(108, 111)) 
)

# Define reference points that should be assigned a relative position of 1
one_rps = list(range(28, 43)) + [89, 90]  # RP28-42 and RP89-90


# Convert lists to sets for faster lookup
zero_rp_set = {f'RP{i}' for i in zero_rps}
one_rp_set = {f'RP{i}' for i in one_rps}

# Apply conditions to assign relative_position
data['relative_position'] = data['reference_point'].apply(
    lambda rp: 0 if rp in zero_rp_set else (1 if rp in one_rp_set else -1)
)

# Convert to integer type to ensure it's numeric
data['relative_position'] = data['relative_position'].astype(int)

# Check unique values to ensure no -1 remains
print("Unique values in relative_position:", data['relative_position'].unique())

# Check the first few rows to confirm the relative_position column was added correctly
print(data[['reference_point', 'relative_position']].head())

Unique values in relative_position: [ 0  1 -1]
  reference_point  relative_position
0             RP1                  0
1             RP1                  0
2             RP1                  0
3             RP1                  0
4             RP1                  0


In [18]:
# Define the mapping dictionary
bssid_mapping = {
    '70:a7:41:dc:8e:55': 1,
    '72:a7:41:9c:8e:55': 2,
    '70:a7:41:dc:77:cd': 3,
    '72:a7:41:9c:77:cd': 4
}

In [19]:
data['bssid'] = data['bssid'].map(bssid_mapping)
print(data[['bssid', 'bssid']].head())

   bssid  bssid
0      4      4
1      3      3
2      4      4
3      4      4
4      3      3


In [20]:
# Convert the 'time' column to datetime format
data['time'] = pd.to_datetime(data['time'])
# Convert to seconds since the epoch
data['time_numeric'] = data['time'].apply(lambda x: x.time())

In [21]:
# Assuming 'reference_point' is your column name in the DataFrame
data['reference_point'] = data['reference_point'].str.replace('RP', '').astype(int)

# Check the result
print(data['reference_point'])

0         1
1         1
2         1
3         1
4         1
       ... 
6591    116
6592    116
6593    116
6594    116
6595    116
Name: reference_point, Length: 6564, dtype: int32


In [22]:
data['reference_point'].unique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  29,  30,  37,
        11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  38,  36,  42,
        41,  40,  39,  35,  33,  34,  32,  31, 113, 114, 115, 118, 119,
       120,  53,  54,  55,  58,  59,  60,  77,  76,  75,  81,  82,  66,
        67,  70,  71,  72,  56,  61,  63,  64,  68,  69,  80,  89,  85,
        88,  90,  28, 110, 105, 100,  95, 109, 108, 103, 104,  99,  98,
        93,  45,  44,  43,  52,  51,  46,  47,  27,  25,  23,  21,  22,
        24,  26,  50,  49,  48, 107, 102,  94,  97,  92, 111, 112, 117,
       116])

In [23]:
cases = {
    "case1": [1,2,3,6,7,8,9,11,12,13,14,16,17,18,19,43,44,45,48,49,85,88],
    "case2": [26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,63,68,76,77,80,81,82,85,88,89,90,105,110,115,120],
    "case3": [8,12,25,30,34,37,39,50,54,64,81,88,104,110,119],
    "case4": [1,2,3,4,5,10,15,20,21,22,47,52,92,93,94,95,97,102,107,111,116,117,56,61,67,72,89,90,75,80]
}

In [24]:
# Create filtered DataFrames
df_case1 = data[data['reference_point'].isin(cases['case1'])]
df_case2 = data[data['reference_point'].isin(cases['case2'])]
df_case3 = data[data['reference_point'].isin(cases['case3'])]
df_case4 = data[data['reference_point'].isin(cases['case4'])]

In [25]:
df_case4.sample(5)

Unnamed: 0,time,reference_point,iteration,ssid,bssid,channel,xr,yr,rssi_ap_digilab,rssi_ap_dosen,relative_position,time_numeric
6355,2025-03-12 05:03:00.032132100+00:00,111,17,DTE Student,4,6,1290,134,-61.0,-200.0,-1,05:03:00.032132
6359,2025-03-12 05:03:25.728986200+00:00,111,18,DTE Staff,3,6,1290,134,-61.0,-200.0,-1,05:03:25.728986
36,2024-10-17 08:46:44.826613400+00:00,1,18,DTE Staff,3,11,300,1368,-63.0,-200.0,0,08:46:44.826613
2992,2025-03-01 01:44:43.900927200+00:00,75,12,DTE Staff,3,1,1720,469,-67.0,-200.0,0,01:44:43.900927
5996,2025-03-12 03:34:01.856391+00:00,107,9,DTE Student,4,6,1120,299,-59.0,-200.0,-1,03:34:01.856391


In [26]:
# Save each filtered dataset to a separate CSV file
df_case1.to_csv("Data/case1_dataset.csv", index=False)
df_case2.to_csv("Data/case2_dataset.csv", index=False)
df_case3.to_csv("Data/case3_dataset.csv", index=False)
df_case4.to_csv("Data/case4_dataset.csv", index=False)

# Return confirmation of file names
["case1_dataset.csv", "case2_dataset.csv", "case3_dataset.csv", "case4_dataset.csv"]

['case1_dataset.csv',
 'case2_dataset.csv',
 'case3_dataset.csv',
 'case4_dataset.csv']