In [6]:
# load the data
import pandas as pd
df = pd.read_csv('../cleaned_dataset/labelled_ble_data.csv')

In [7]:
df.head()

Unnamed: 0,timestamp,mac address,RSSI,room
0,2023-04-10 14:21:46+09:00,6,-93,kitchen
1,2023-04-10 14:21:46+09:00,6,-93,kitchen
2,2023-04-10 14:21:46+09:00,6,-93,kitchen
3,2023-04-10 14:21:46+09:00,6,-93,kitchen
4,2023-04-10 14:21:46+09:00,6,-93,kitchen


In [8]:
# now we would group by timestamp / means that grouping by 
# each window of size = 1 second, and then for each window of 
# 1 second, we would count the top 3 most frequent mac address 
# and create the aggregated feature: top 3 beacons for each window
import pandas as pd

def create_windowed_df(df):
    windowed_records = []

    for timestamp, group in df.groupby("timestamp"):
        # count mac address frequency
        top_3 = (
            group["mac address"]
            .value_counts()
            .head(3)
            .index
            .tolist()
        )

        windowed_records.append({
            "timestamp": timestamp,
            "top_3_beacons": top_3,
            "room": group["room"].iloc[0]
        })

    windowed_df = pd.DataFrame(windowed_records)
    return windowed_df

windowed_df = create_windowed_df(df)

In [9]:
windowed_df.head()

Unnamed: 0,timestamp,top_3_beacons,room
0,2023-04-10 14:21:46+09:00,"[4, 6]",kitchen
1,2023-04-10 14:21:47+09:00,"[4, 6, 9]",kitchen
2,2023-04-10 14:21:48+09:00,"[4, 9]",kitchen
3,2023-04-10 14:21:49+09:00,"[9, 4, 7]",kitchen
4,2023-04-10 14:21:50+09:00,"[9, 4, 7]",kitchen


In [10]:
# Analysis: Beacon signature consistency per room
from collections import Counter
import numpy as np

def analyze_room_beacon_consistency(windowed_df):
    """
    For each room, find the most common top-3 beacon pattern
    and calculate how consistently it appears
    """
    results = []
    
    for room in windowed_df['room'].unique():
        room_data = windowed_df[windowed_df['room'] == room]
        
        # Convert top_3_beacons lists to tuples (so they're hashable)
        beacon_patterns = [tuple(sorted(beacons)) for beacons in room_data['top_3_beacons']]
        
        # Count pattern frequencies
        pattern_counts = Counter(beacon_patterns)
        
        # Get the most common pattern
        if pattern_counts:
            most_common_pattern, most_common_count = pattern_counts.most_common(1)[0]
            consistency = most_common_count / len(room_data)
            
            # Also calculate how many unique patterns exist
            num_unique_patterns = len(pattern_counts)
            
            results.append({
                'room': room,
                'total_windows': len(room_data),
                'most_common_pattern': most_common_pattern,
                'pattern_frequency': most_common_count,
                'consistency_score': consistency,
                'num_unique_patterns': num_unique_patterns
            })
    
    results_df = pd.DataFrame(results)
    return results_df.sort_values('consistency_score', ascending=False)

# Run the analysis
consistency_results = analyze_room_beacon_consistency(windowed_df)
print("=" * 80)
print("BEACON SIGNATURE CONSISTENCY ANALYSIS")
print("=" * 80)
print(consistency_results.to_string(index=False))
print("\n" + "=" * 80)
print(f"Average Consistency Score: {consistency_results['consistency_score'].mean():.3f}")
print(f"Median Consistency Score: {consistency_results['consistency_score'].median():.3f}")
print("=" * 80)

BEACON SIGNATURE CONSISTENCY ANALYSIS
         room  total_windows most_common_pattern  pattern_frequency  consistency_score  num_unique_patterns
          512            292               (23,)                163           0.558219                   39
          508             44               (20,)                 18           0.409091                   18
          506            257               (19,)                 95           0.369650                   34
      kitchen           5173               (14,)               1722           0.332882                  126
     cleaning            688               (19,)                188           0.273256                   93
          522            198               (22,)                 48           0.242424                   55
    cafeteria           4862               (14,)               1115           0.229329                  170
          520            311                (8,)                 71           0.228296            

In [11]:
# Analysis 2: Which single beacon appears MOST FREQUENTLY in each room's top-3?
# This is more flexible than exact pattern matching

def analyze_dominant_beacons(windowed_df):
    """For each room, find which beacons most frequently appear in top-3"""
    results = []
    
    for room in windowed_df['room'].unique():
        room_data = windowed_df[windowed_df['room'] == room]
        
        # Flatten all beacons from all top-3 lists for this room
        all_beacons_in_room = []
        for beacons in room_data['top_3_beacons']:
            all_beacons_in_room.extend(beacons)
        
        # Count beacon frequencies
        beacon_counts = Counter(all_beacons_in_room)
        total_windows = len(room_data)
        
        # Get top 3 most frequent individual beacons
        top_beacons = beacon_counts.most_common(3)
        
        if top_beacons:
            # Calculate how often the #1 beacon appears
            most_common_beacon, beacon_count = top_beacons[0]
            beacon_frequency = beacon_count / total_windows  # Can be > 1 if beacon appears multiple times per window
            
            results.append({
                'room': room,
                'total_windows': total_windows,
                'dominant_beacon': most_common_beacon,
                'appearances': beacon_count,
                'appearance_rate': min(beacon_frequency, 1.0),  # Cap at 1.0 for readability
                'top_3_beacons': [b[0] for b in top_beacons],
                'top_3_rates': [b[1]/total_windows for b in top_beacons]
            })
    
    results_df = pd.DataFrame(results)
    return results_df.sort_values('appearance_rate', ascending=False)

dominant_beacon_results = analyze_dominant_beacons(windowed_df)
print("=" * 100)
print("DOMINANT BEACON ANALYSIS - Which beacons characterize each room?")
print("=" * 100)
for _, row in dominant_beacon_results.iterrows():
    print(f"\n{row['room']:>15s} ({row['total_windows']:>5d} windows)")
    print(f"   Dominant beacon: {row['dominant_beacon']:>2d} (appears in {row['appearance_rate']:.1%} of windows)")
    print(f"   Top 3 beacons:   {row['top_3_beacons']} with rates {[f'{r:.2f}' for r in row['top_3_rates']]}")

print("\n" + "=" * 100)

DOMINANT BEACON ANALYSIS - Which beacons characterize each room?

            512 (  292 windows)
   Dominant beacon: 23 (appears in 79.5% of windows)
   Top 3 beacons:   [23, 12, 22] with rates ['0.79', '0.23', '0.20']

            506 (  257 windows)
   Dominant beacon: 19 (appears in 75.1% of windows)
   Top 3 beacons:   [19, 6, 5] with rates ['0.75', '0.32', '0.31']

            520 (  311 windows)
   Dominant beacon:  8 (appears in 71.7% of windows)
   Top 3 beacons:   [8, 20, 10] with rates ['0.72', '0.39', '0.37']

        kitchen ( 5173 windows)
   Dominant beacon: 14 (appears in 69.9% of windows)
   Top 3 beacons:   [14, 4, 6] with rates ['0.70', '0.57', '0.11']

  nurse station ( 9363 windows)
   Dominant beacon:  9 (appears in 66.3% of windows)
   Top 3 beacons:   [9, 4, 7] with rates ['0.66', '0.41', '0.33']

            508 (   44 windows)
   Dominant beacon: 20 (appears in 61.4% of windows)
   Top 3 beacons:   [20, 8, 18] with rates ['0.61', '0.36', '0.30']

            5

In [20]:
windowed_df.to_csv('windowed_top_beacon.csv', index=False)

In [22]:
# Modified function to get beacon appearance percentages per window
def create_windowed_df_with_percentages(df):
    windowed_records = []

    for timestamp, group in df.groupby("timestamp"):
        # Count how many times each beacon appears in this window
        beacon_counts = group["mac address"].value_counts().head(3)  # ðŸ”¥ top 3 only
        total_readings = len(group)
        
        # Calculate percentage for top 3 beacons (rounded to 2 decimals)
        beacon_percentages = {
            beacon: round(count / total_readings, 2)
            for beacon, count in beacon_counts.items()
        }
        
        windowed_records.append({
            "timestamp": timestamp,
            "beacon_percentages": beacon_percentages,  # Dict: top 3 only
            "room": group["room"].iloc[0]
        })

    windowed_df = pd.DataFrame(windowed_records)
    return windowed_df

# Create new windowed dataframe
windowed_df_pct = create_windowed_df_with_percentages(df)

In [23]:
windowed_df_pct.head(10)

Unnamed: 0,timestamp,beacon_percentages,room
0,2023-04-10 14:21:46+09:00,"{4: 0.69, 6: 0.31}",kitchen
1,2023-04-10 14:21:47+09:00,"{4: 0.66, 6: 0.17, 9: 0.17}",kitchen
2,2023-04-10 14:21:48+09:00,"{4: 0.75, 9: 0.25}",kitchen
3,2023-04-10 14:21:49+09:00,"{9: 0.32, 4: 0.21, 7: 0.16}",kitchen
4,2023-04-10 14:21:50+09:00,"{9: 0.52, 4: 0.47, 7: 0.01}",kitchen
5,2023-04-10 14:21:57+09:00,{4: 1.0},cafeteria
6,2023-04-10 14:21:58+09:00,{4: 1.0},cafeteria
7,2023-04-10 14:21:59+09:00,{4: 1.0},cafeteria
8,2023-04-10 14:22:06+09:00,{4: 1.0},cafeteria
9,2023-04-10 14:22:07+09:00,{4: 1.0},cafeteria


In [24]:
windowed_df_pct.to_csv('windowed_top_beacons_pct.csv')