In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking, Bidirectional, GRU
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, classification_report
import random
import os

print("âœ… Imports loaded")

  if not hasattr(np, "object"):


âœ… Imports loaded


In [2]:
def load_and_filter_fold(i):
    train_dir = f'../../cleaned_dataset/split_data/fold{i}/train.csv'  
    test_dir = f'../../cleaned_dataset/split_data/fold{i}/test.csv'   
    # load the data 
    train_df = pd.read_csv(train_dir)
    test_df = pd.read_csv(test_dir)

    # Get all of the unique labels for train / test sets 
    train_labels = list(train_df['room'].unique())
    test_labels = list(test_df['room'].unique())

    # Labels that appear in BOTH train and test
    common_labels = list(set(train_labels) & set(test_labels))

    # Filter to just keep the records with labels in common labels list
    train_df = train_df[train_df['room'].isin(common_labels)].reset_index(drop=True)
    test_df  = test_df[test_df['room'].isin(common_labels)].reset_index(drop=True)

    return train_df, test_df

# Load fold 1
train_df, test_df = load_and_filter_fold(1)

# Load fold 2
train_df_2, test_df_2 = load_and_filter_fold(2)

# Load fold 3
train_df_3, test_df_3 = load_and_filter_fold(3)

# Load fold 4
train_df_4, test_df_4 = load_and_filter_fold(4)

In [3]:
def create_beacon_count_vectors(df):
    """Aggregates readings into 1s vectors."""
    vectors = []

    for _, group in df.groupby('timestamp'):
        beacon_counts = group['mac address'].value_counts()
        total_readings = len(group)

        vector = [0.0] * 23
        for beacon_id, count in beacon_counts.items():
            if 1 <= beacon_id <= 23:
                vector[int(beacon_id) - 1] = count / total_readings

        entry = {
            'timestamp': group['timestamp'].iloc[0],
            'room': group['room'].iloc[0],
            'beacon_vector': vector
        }

        vectors.append(entry)

    return pd.DataFrame(vectors)

print("âœ… Helper functions defined")

âœ… Helper functions defined


In [4]:
def evaluate_change_point_detection(test_df, penalty=20, min_size=5):
    """
    Evaluate change point detection quality WITHOUT using the model.
    Just check: do detected boundaries match true room changes?
    """
    from ruptures import Pelt
    
    # Prepare data
    test_vectors = create_beacon_count_vectors(test_df)
    test_vectors = test_vectors.sort_values('timestamp').reset_index(drop=True)
    
    # Build beacon sequence
    beacon_sequence = np.array([v for v in test_vectors['beacon_vector']])
    
    # Detect change points
    algo = Pelt(model="rbf", min_size=min_size, jump=1)
    algo.fit(beacon_sequence)
    detected_changes = algo.predict(pen=penalty)
    
    # Get TRUE room change points
    test_vectors['room_changed'] = (test_vectors['room'] != test_vectors['room'].shift()).astype(int)
    true_changes = test_vectors[test_vectors['room_changed'] == 1].index.tolist()
    true_changes.append(len(test_vectors))  # Add end point
    
    print(f"\n{'='*80}")
    print(f"CHANGE POINT DETECTION EVALUATION (penalty={penalty})")
    print(f"{'='*80}")
    print(f"True room changes: {len(true_changes)-1}")
    print(f"Detected changes:  {len(detected_changes)-1}")
    
    # Calculate segment purity
    purities = []
    detected_segments_info = []
    
    for i in range(len(detected_changes) - 1):
        start, end = detected_changes[i], detected_changes[i+1]
        segment = test_vectors.iloc[start:end]
        
        if len(segment) == 0:
            continue
        
        # What's the most common room in this segment?
        most_common_room = segment['room'].mode()[0]
        purity = (segment['room'] == most_common_room).sum() / len(segment)
        
        purities.append(purity)
        detected_segments_info.append({
            'start': start,
            'end': end,
            'length': len(segment),
            'dominant_room': most_common_room,
            'purity': purity,
            'num_rooms': segment['room'].nunique()
        })
    
    # Calculate boundary matching accuracy
    def find_nearby_matches(detected, true, tolerance=5):
        """Count how many detected boundaries are within tolerance of true boundaries"""
        matches = 0
        for d in detected[:-1]:  # Exclude last point
            for t in true[:-1]:
                if abs(d - t) <= tolerance:
                    matches += 1
                    break
        return matches
    
    boundary_precision = find_nearby_matches(detected_changes, true_changes, tolerance=5) / max(len(detected_changes)-1, 1)
    boundary_recall = find_nearby_matches(true_changes, detected_changes, tolerance=5) / max(len(true_changes)-1, 1)
    
    print(f"\nBoundary Detection (Â±5 frame tolerance):")
    print(f"  Precision: {boundary_precision:.3f} (detected boundaries that are correct)")
    print(f"  Recall:    {boundary_recall:.3f} (true boundaries that were detected)")
    
    print(f"\nSegment Purity:")
    print(f"  Mean purity: {np.mean(purities):.3f} (1.0 = perfect, each segment is one room)")
    print(f"  Median purity: {np.median(purities):.3f}")
    print(f"  Min purity: {np.min(purities):.3f}")
    
    # Show worst segments (mixed rooms)
    print(f"\nWorst Segments (most mixed):")
    worst_segments = sorted(detected_segments_info, key=lambda x: x['purity'])[:5]
    for seg in worst_segments:
        print(f"  Frames {seg['start']}-{seg['end']} (len={seg['length']}): "
              f"purity={seg['purity']:.3f}, {seg['num_rooms']} rooms, dominant={seg['dominant_room']}")
    
    return {
        'detected_changes': detected_changes,
        'true_changes': true_changes,
        'boundary_precision': boundary_precision,
        'boundary_recall': boundary_recall,
        'mean_purity': np.mean(purities),
        'segments_info': detected_segments_info
    }

print("âœ… Change point detection evaluation function ready")

âœ… Change point detection evaluation function ready


In [5]:
print("Testing Change Point Detection Quality\n")

results = {}
for penalty in [5, 10, 20, 30, 50]:
    result = evaluate_change_point_detection(test_df, penalty=penalty, min_size=5)
    results[penalty] = result
    print("\n" + "="*80 + "\n")

Testing Change Point Detection Quality


CHANGE POINT DETECTION EVALUATION (penalty=5)
True room changes: 51
Detected changes:  23

Boundary Detection (Â±5 frame tolerance):
  Precision: 0.609 (detected boundaries that are correct)
  Recall:    0.373 (true boundaries that were detected)

Segment Purity:
  Mean purity: 0.822 (1.0 = perfect, each segment is one room)
  Median purity: 0.854
  Min purity: 0.409

Worst Segments (most mixed):
  Frames 1636-1724 (len=88): purity=0.409, 4 rooms, dominant=cleaning
  Frames 1001-1122 (len=121): purity=0.529, 3 rooms, dominant=kitchen
  Frames 1280-1395 (len=115): purity=0.565, 3 rooms, dominant=513
  Frames 2322-2399 (len=77): purity=0.636, 2 rooms, dominant=kitchen
  Frames 1883-2080 (len=197): purity=0.701, 2 rooms, dominant=kitchen



CHANGE POINT DETECTION EVALUATION (penalty=10)
True room changes: 51
Detected changes:  12

Boundary Detection (Â±5 frame tolerance):
  Precision: 0.583 (detected boundaries that are correct)
  Recall:    0.157 

KeyboardInterrupt: 

In [None]:
print("\n" + "="*80)
print("SUMMARY: Which penalty works best?")
print("="*80)

comparison = []
for penalty, result in results.items():
    comparison.append({
        'Penalty': penalty,
        'Precision': f"{result['boundary_precision']:.3f}",
        'Recall': f"{result['boundary_recall']:.3f}",
        'Purity': f"{result['mean_purity']:.3f}",
        'Num Segments': len(result['segments_info'])
    })

comparison_df = pd.DataFrame(comparison)
print(comparison_df.to_string(index=False))

print("\nðŸ’¡ Look for:")
print("   â€¢ Precision > 0.6 (not too many false alarms)")
print("   â€¢ Recall > 0.7 (catches most real transitions)")
print("   â€¢ Purity > 0.8 (segments are mostly one room)")