In [1]:
# Load the data
import pandas as pd
train_df = pd.read_csv("../../cleaned_dataset/split_data/model_selection/train.csv")
test_df = pd.read_csv("../../cleaned_dataset/split_data/model_selection/test.csv")

In [2]:
# Get all of the unique labels for train / test sets 
train_labels = list(train_df['room'].unique())
test_labels = list(test_df['room'].unique())

# Labels that appear in BOTH train and test
common_labels = list(set(train_labels) & set(test_labels))

# Now filter the train_df and test_df so that
# remove all record in train_df whose labels not in test labels 
# and remove all record in test_df whose labels not in train labels 
# therefore we can have a proper train and test df such that the labels appear 
# in train and test should be the same and vice versa
train_df = train_df[train_df['room'].isin(common_labels)].reset_index(drop=True)
test_df  = test_df[test_df['room'].isin(common_labels)].reset_index(drop=True)

In [3]:
# sanity check for label matching between train and test set
assert set(train_df['room'].unique()) == set(test_df['room'].unique())
print("Train/Test labels are now perfectly aligned")

Train/Test labels are now perfectly aligned


In [4]:
# check the distribution of classes in training set
train_df['room'].value_counts()

room
nurse station    488303
cafeteria        171492
kitchen          132819
hallway           63848
523               22169
511                8505
522                8079
cleaning           6232
506                4995
512                3773
520                3695
501                3415
517                1397
513                1152
502                 801
518                 252
Name: count, dtype: int64

In [11]:
# so now we could determine the minority classes would be room 502 and 518
minority_classes = ['502', '518']

# define the target number of records for these minority classes after we done 
# relabeling + concatenating with original training set
TARGET_NUM_502 = 1000 
TARGET_NUM_518 = 1200

In [6]:
# Function to add 25 beacon_1, beacon_2, ... , beacon_25
def add_beacon_features(df, num_beacons=25):
    df = df.copy()

    for i in range(1, num_beacons + 1):
        df[f'beacon_{i}'] = df['RSSI'].where(df['mac address'] == i, 0)

    return df

# Apply with our current train and test dataframe
train_df = add_beacon_features(train_df)
test_df = add_beacon_features(test_df)

In [9]:
train_df.head(1)

Unnamed: 0,timestamp,mac address,RSSI,room,beacon_1,beacon_2,beacon_3,beacon_4,beacon_5,beacon_6,...,beacon_16,beacon_17,beacon_18,beacon_19,beacon_20,beacon_21,beacon_22,beacon_23,beacon_24,beacon_25
0,2023-04-10 14:21:46+09:00,6,-93,kitchen,0,0,0,0,0,-93,...,0,0,0,0,0,0,0,0,0,0


In [10]:
test_df.head(1)

Unnamed: 0,timestamp,mac address,RSSI,room,beacon_1,beacon_2,beacon_3,beacon_4,beacon_5,beacon_6,...,beacon_16,beacon_17,beacon_18,beacon_19,beacon_20,beacon_21,beacon_22,beacon_23,beacon_24,beacon_25
0,2023-04-12 10:00:53+09:00,4,-92,cafeteria,0,0,0,-92,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
# Define the 6-beacon surrounding vectors for each room
# Format: [front-left, side-left, source, front, side-right, front-right]
# Based on the 5th floor map
# ONLY including rooms that appear in BOTH train and test sets

beacon_layout = {
    # Rooms from value_counts that are patient rooms
    '501': [16, 2, 1, 15, None, 13],      # Room 1
    '502': [17, 3, 2, 16, 1, 15],         # Room 2 - MINORITY CLASS
    '506': ["to be determined later"],    # Room 6
    '511': [23, 12, 11, 22, 10, 21],      # Room 11
    '512': [None, None, 12, 23, 11, 22],  # Room 12
    '513': [None, None, 13, None, 15, 1],      # Room 13
    '517': [2, 16, 17, 3, None, 5],      # Room 17
    '518': [None, 9, 18, 7, 20, 8],         # Room 18 - MINORITY CLASS
    '520': [7, 18, 20, 8, 21, 10],       # Room 20
    '522': [10, 21, 22, 11, 23, 12],      # Room 22
    '523': [11, 22, 23, 12, None, None],  # Room 23
}

# Classify rooms by matching type
full_matching_rooms = [room for room, beacons in beacon_layout.items() 
                       if None not in beacons]

partial_matching_rooms = list(beacon_layout.keys())

print(f"Total rooms defined: {len(beacon_layout)}")
print(f"Full matching rooms (complete 6 beacons): {full_matching_rooms}")
print(f"Total partial matching rooms: {len(partial_matching_rooms)}")
print(f"\nMinority classes: {minority_classes}")
print(f"Room 502 beacons: {beacon_layout['502']}")
print(f"Room 518 beacons: {beacon_layout['518']}")

Total rooms defined: 11
Full matching rooms (complete 6 beacons): ['502', '506', '511', '520', '522']
Total partial matching rooms: 11

Minority classes: ['502', '518']
Room 502 beacons: [17, 3, 2, 16, 1, 15]
Room 518 beacons: [None, 9, 18, 7, 20, 8]
