In [1]:
import pandas as pd
df = pd.read_csv('../cleaned_dataset/labelled_ble_data.csv')

In [2]:
df.head()

Unnamed: 0,timestamp,mac address,RSSI,room
0,2023-04-10 14:21:46+09:00,6,-93,kitchen
1,2023-04-10 14:21:46+09:00,6,-93,kitchen
2,2023-04-10 14:21:46+09:00,6,-93,kitchen
3,2023-04-10 14:21:46+09:00,6,-93,kitchen
4,2023-04-10 14:21:46+09:00,6,-93,kitchen


In [3]:
# Create groups for consecutive room visits
df['room_group'] = (df['room'] != df['room'].shift()).cumsum()

# Now you can group by both room and room_group
grouped = df.groupby(['room', 'room_group'])

In [4]:
df.head()

Unnamed: 0,timestamp,mac address,RSSI,room,room_group
0,2023-04-10 14:21:46+09:00,6,-93,kitchen,1
1,2023-04-10 14:21:46+09:00,6,-93,kitchen,1
2,2023-04-10 14:21:46+09:00,6,-93,kitchen,1
3,2023-04-10 14:21:46+09:00,6,-93,kitchen,1
4,2023-04-10 14:21:46+09:00,6,-93,kitchen,1


In [5]:
# Simple way - filter by room_group
room_group_1 = df[df['room_group'] == 1]
room_group_1['mac address'].value_counts()

mac address
4     260
9     119
6      68
7      18
18     17
Name: count, dtype: int64

In [6]:
room_group_2 = df[df['room_group'] == 2]
room_group_2['mac address'].value_counts()

mac address
4     451
9     102
14     68
18     34
7      34
5      17
19     17
Name: count, dtype: int64

In [7]:
room_group_3 = df[df['room_group'] == 3]
room_group_3['mac address'].value_counts()

mac address
14    1292
4     1241
19     189
5      136
6      102
9       51
17      51
3       17
Name: count, dtype: int64

In [8]:
# Convert timestamp to datetime first
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Now run the gap analysis
gaps = []

# Get unique room_group values
room_groups = df['room_group'].unique()

# Loop through consecutive room groups
for i in range(len(room_groups) - 1):
    current_group = room_groups[i]
    next_group = room_groups[i + 1]
    
    # Get last timestamp of current group
    last_timestamp = df[df['room_group'] == current_group]['timestamp'].iloc[-1]
    
    # Get first timestamp of next group
    first_timestamp = df[df['room_group'] == next_group]['timestamp'].iloc[0]
    
    # Check if they're on the same day
    if last_timestamp.date() == first_timestamp.date():
        # Calculate the gap
        gap = first_timestamp - last_timestamp
        
        # Format as HH:MM:SS
        total_seconds = int(gap.total_seconds())
        hours = total_seconds // 3600
        minutes = (total_seconds % 3600) // 60
        seconds = total_seconds % 60
        
        gap_formatted = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
        
        gaps.append({
            'from_group': current_group,
            'to_group': next_group,
            'last_timestamp': last_timestamp,
            'first_timestamp': first_timestamp,
            'gap': gap_formatted
        })

print(f"Found {len(gaps)} gaps:")
for g in gaps:
    print(g)

# Just the gap times
gap_times = [g['gap'] for g in gaps]
print("\nJust the gaps:", gap_times)

Found 314 gaps:
{'from_group': np.int64(1), 'to_group': np.int64(2), 'last_timestamp': Timestamp('2023-04-10 14:21:50+0900', tz='UTC+09:00'), 'first_timestamp': Timestamp('2023-04-10 14:21:57+0900', tz='UTC+09:00'), 'gap': '00:00:07'}
{'from_group': np.int64(2), 'to_group': np.int64(3), 'last_timestamp': Timestamp('2023-04-10 14:22:16+0900', tz='UTC+09:00'), 'first_timestamp': Timestamp('2023-04-10 14:22:23+0900', tz='UTC+09:00'), 'gap': '00:00:07'}
{'from_group': np.int64(3), 'to_group': np.int64(4), 'last_timestamp': Timestamp('2023-04-10 14:23:25+0900', tz='UTC+09:00'), 'first_timestamp': Timestamp('2023-04-10 14:23:29+0900', tz='UTC+09:00'), 'gap': '00:00:04'}
{'from_group': np.int64(4), 'to_group': np.int64(5), 'last_timestamp': Timestamp('2023-04-10 14:23:41+0900', tz='UTC+09:00'), 'first_timestamp': Timestamp('2023-04-10 14:23:42+0900', tz='UTC+09:00'), 'gap': '00:00:01'}
{'from_group': np.int64(5), 'to_group': np.int64(6), 'last_timestamp': Timestamp('2023-04-10 14:24:05+0900',

In [9]:
print(sorted(set(gap_times)))

['00:00:01', '00:00:02', '00:00:03', '00:00:04', '00:00:05', '00:00:06', '00:00:07', '00:00:08', '00:00:09', '00:00:10', '00:00:11', '00:00:12', '00:00:13', '00:00:14', '00:00:15', '00:00:16', '00:00:17', '00:00:18', '00:00:19', '00:00:20', '00:00:21', '00:00:22', '00:00:23', '00:00:24', '00:00:25', '00:00:27', '00:00:28', '00:00:29', '00:00:31', '00:00:32', '00:00:33', '00:00:34', '00:00:35', '00:00:37', '00:00:38', '00:00:39', '00:00:40', '00:00:41', '00:00:42', '00:00:43', '00:00:44', '00:00:46', '00:00:47', '00:00:48', '00:00:49', '00:00:51', '00:00:53', '00:00:54', '00:00:56', '00:00:58', '00:00:59', '00:01:01', '00:01:02', '00:01:03', '00:01:11', '00:01:20', '00:01:34', '00:01:41', '00:02:12', '00:02:18', '00:02:26', '00:02:33', '00:02:41', '00:02:56', '00:02:57', '00:03:00', '00:03:19', '00:03:34', '00:03:40', '00:03:48', '00:04:01', '00:04:09', '00:05:01', '00:05:20', '00:05:57', '00:06:16', '00:07:11', '00:11:08', '00:11:33', '00:25:13', '01:44:56']
