In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Setup and Imports
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.layers import Bidirectional, GRU
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings('ignore')

print("‚úì All imports successful")

‚úì All imports successful


In [3]:
def load_and_filter_fold(i):
    train_dir = f'/content/drive/MyDrive/split_data/fold{i}/train.csv'
    test_dir = f'/content/drive/MyDrive/split_data/fold{i}/test.csv'
    train_df = pd.read_csv(train_dir)
    test_df = pd.read_csv(test_dir)

    train_labels = list(train_df['room'].unique())
    test_labels = list(test_df['room'].unique())
    common_labels = list(set(train_labels) & set(test_labels))

    train_df = train_df[train_df['room'].isin(common_labels)].reset_index(drop=True)
    test_df = test_df[test_df['room'].isin(common_labels)].reset_index(drop=True)

    return train_df, test_df

# Load all 4 folds
train_df_1, test_df_1 = load_and_filter_fold(1)
train_df_2, test_df_2 = load_and_filter_fold(2)
train_df_3, test_df_3 = load_and_filter_fold(3)
train_df_4, test_df_4 = load_and_filter_fold(4)

print("‚úì All folds loaded")

‚úì All folds loaded


In [4]:
def set_seeds(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

def create_room_groups(df):
    df = df.sort_values('timestamp').reset_index(drop=True)
    df['room_group'] = (df['room'] != df['room'].shift()).cumsum()
    return df

def create_beacon_count_vectors(df):
    """Aggregates readings into 1s vectors. Handles data with or without 'room_group'."""
    vectors = []
    has_groups = 'room_group' in df.columns # Check if we are in 'training' mode

    for _, group in df.groupby('timestamp'):
        beacon_counts = group['mac address'].value_counts()
        total_readings = len(group)

        vector = [0.0] * 23
        for beacon_id, count in beacon_counts.items():
            if 1 <= beacon_id <= 23:
                vector[int(beacon_id) - 1] = count / total_readings

        entry = {
            'timestamp': group['timestamp'].iloc[0],
            'room': group['room'].iloc[0],
            'beacon_vector': vector
        }

        if has_groups:
            entry['room_group'] = group['room_group'].iloc[0]

        vectors.append(entry)

    return pd.DataFrame(vectors)

def create_sequences_from_groups(vector_df, min_length=3, max_length=50):
    """Used for Training: Creates clean sequences where the room is constant."""
    sequences = []
    labels = []

    for (room, room_group), group in vector_df.groupby(['room', 'room_group']):
        group = group.sort_values('timestamp').reset_index(drop=False)
        seq_length = len(group)

        if seq_length < min_length:
            continue

        if seq_length > max_length:
            group = group.tail(max_length)

        sequence = [row['beacon_vector'] for _, row in group.iterrows()]
        sequences.append(sequence)
        labels.append(room)

    return sequences, labels

def build_bidirectional_gru_model(input_shape, num_classes):
    """
    Bidirectional GRU Architecture
    """
    model = Sequential([
        Masking(mask_value=0.0, input_shape=input_shape),

        Bidirectional(GRU(128, return_sequences=True)),
        Dropout(0.3),

        Bidirectional(GRU(64, return_sequences=False)),
        Dropout(0.3),

        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

print("‚úÖ Basic functions defined")

‚úÖ Basic functions defined


In [5]:
def create_extended_multidirectional_windows(vector_df):
    """
    EXPERIMENT 2: Create 7 types of sliding windows for extended multi-directional prediction

    Directions:
    1. backward_10:  [i-9 to i]     - 10s history, predict at i
    2. centered_10:  [i-4 to i+5]   - 10s centered, predict at i
    3. forward_10:   [i to i+9]     - 10s future, predict at i
    4. backward_15:  [i-14 to i]    - 15s history (more context)
    5. forward_15:   [i to i+14]    - 15s future (earlier transition detection)
    6. asymm_past:   [i-11 to i+3]  - 12s past + 4s future (transition from old room)
    7. asymm_future: [i-3 to i+11]  - 4s past + 12s future (entering new room)

    Returns:
        Dictionary with direction names as keys
        Each contains: (sequences, labels, valid_indices)
    """
    # Ensure chronological order and group by day
    vector_df['dt'] = pd.to_datetime(vector_df['timestamp'])
    vector_df['date'] = vector_df['dt'].dt.date

    results = {
        'backward_10': {'sequences': [], 'labels': [], 'indices': []},
        'centered_10': {'sequences': [], 'labels': [], 'indices': []},
        'forward_10': {'sequences': [], 'labels': [], 'indices': []},
        'backward_15': {'sequences': [], 'labels': [], 'indices': []},
        'forward_15': {'sequences': [], 'labels': [], 'indices': []},
        'asymm_past': {'sequences': [], 'labels': [], 'indices': []},
        'asymm_future': {'sequences': [], 'labels': [], 'indices': []},
    }

    for _, day_group in vector_df.groupby('date'):
        day_group = day_group.sort_values('timestamp').reset_index(drop=True)
        vectors = list(day_group['beacon_vector'])
        rooms = list(day_group['room'])
        n = len(vectors)

        for i in range(n):
            # 1. BACKWARD_10: [i-9, ..., i] predict at i
            if i >= 9:
                window = vectors[i - 9 : i + 1]
                results['backward_10']['sequences'].append(window)
                results['backward_10']['labels'].append(rooms[i])
                results['backward_10']['indices'].append((day_group['date'].iloc[0], i))

            # 2. CENTERED_10: [i-4, ..., i, ..., i+5] predict at i
            if i >= 4 and i + 5 < n:
                window = vectors[i - 4 : i + 6]
                results['centered_10']['sequences'].append(window)
                results['centered_10']['labels'].append(rooms[i])
                results['centered_10']['indices'].append((day_group['date'].iloc[0], i))

            # 3. FORWARD_10: [i, ..., i+9] predict at i
            if i + 9 < n:
                window = vectors[i : i + 10]
                results['forward_10']['sequences'].append(window)
                results['forward_10']['labels'].append(rooms[i])
                results['forward_10']['indices'].append((day_group['date'].iloc[0], i))

            # 4. BACKWARD_15: [i-14, ..., i] predict at i (MORE HISTORY)
            if i >= 14:
                window = vectors[i - 14 : i + 1]
                results['backward_15']['sequences'].append(window)
                results['backward_15']['labels'].append(rooms[i])
                results['backward_15']['indices'].append((day_group['date'].iloc[0], i))

            # 5. FORWARD_15: [i, ..., i+14] predict at i (EARLIER TRANSITION DETECTION)
            if i + 14 < n:
                window = vectors[i : i + 15]
                results['forward_15']['sequences'].append(window)
                results['forward_15']['labels'].append(rooms[i])
                results['forward_15']['indices'].append((day_group['date'].iloc[0], i))

            # 6. ASYMM_PAST: [i-11, ..., i, ..., i+3] predict at i (HEAVY PAST BIAS)
            # Good for detecting we're leaving a room
            if i >= 11 and i + 3 < n:
                window = vectors[i - 11 : i + 4]
                results['asymm_past']['sequences'].append(window)
                results['asymm_past']['labels'].append(rooms[i])
                results['asymm_past']['indices'].append((day_group['date'].iloc[0], i))

            # 7. ASYMM_FUTURE: [i-3, ..., i, ..., i+11] predict at i (HEAVY FUTURE BIAS)
            # Good for detecting we're entering a room
            if i >= 3 and i + 11 < n:
                window = vectors[i - 3 : i + 12]
                results['asymm_future']['sequences'].append(window)
                results['asymm_future']['labels'].append(rooms[i])
                results['asymm_future']['indices'].append((day_group['date'].iloc[0], i))

    return results

print("‚úÖ Extended multi-directional window function defined (7 directions)")

‚úÖ Extended multi-directional window function defined (7 directions)


In [6]:
def train_ensemble_models(train_df, n_models=5, base_seed=42, verbose=False):
    """
    Train multiple models with different seeds for ensemble

    Returns:
        models: List of trained Keras models
        label_encoder: Fitted label encoder
    """
    if verbose:
        print(f"  Training ensemble of {n_models} models...")

    # Prepare data (same for all models)
    train_df_grouped = create_room_groups(train_df)
    train_vector_df = create_beacon_count_vectors(train_df_grouped)
    X_train_seq, y_train_labels = create_sequences_from_groups(train_vector_df, max_length=50)

    # Encode labels
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train_labels)

    # Pad sequences
    X_train_padded = pad_sequences(X_train_seq, maxlen=50, padding='post', dtype='float32', value=0.0)

    # Compute class weights
    class_weights_array = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights = {i: weight for i, weight in enumerate(class_weights_array)}

    # Train multiple models
    models = []
    for i in range(n_models):
        model_seed = base_seed + i * 1000  # 42, 1042, 2042, 3042, 4042
        set_seeds(model_seed)

        if verbose:
            print(f"    Model {i+1}/{n_models} (seed {model_seed})...", end=" ")

        model = build_bidirectional_gru_model(
            input_shape=(50, 23),
            num_classes=len(label_encoder.classes_)
        )

        # Callbacks
        early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True, verbose=0)
        reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=3, verbose=0, min_lr=1e-6)

        # Train
        model.fit(
            X_train_padded, y_train,
            epochs=30,
            batch_size=32,
            class_weight=class_weights,
            callbacks=[early_stop, reduce_lr],
            verbose=0
        )

        models.append(model)

        if verbose:
            print("‚úì")

    return models, label_encoder

print("‚úì Ensemble training function defined")

‚úì Ensemble training function defined


In [7]:
def predict_single_direction(models, sequences, max_seq_length=50):
    """
    Get ensemble predictions for a single direction

    Returns:
        ensemble_proba: (n_samples, n_classes) averaged probability matrix
    """
    # Pad sequences
    X_padded = pad_sequences(sequences, maxlen=max_seq_length, dtype='float32', padding='post', value=0.0)

    # Get predictions from all models
    all_predictions = []
    for model in models:
        proba = model.predict(X_padded, verbose=0)
        all_predictions.append(proba)

    # Average probabilities across ensemble
    ensemble_proba = np.mean(all_predictions, axis=0)

    return ensemble_proba

def combine_directional_predictions(direction_results, method='confidence_weighted'):
    """
    Combine predictions from multiple directions using confidence weighting
    Now handles 7 directions instead of 3

    Args:
        direction_results: Dict with keys for all 7 directions
                          Each value is a dict with 'proba' and 'indices'
        method: 'confidence_weighted', 'equal', or 'softmax'

    Returns:
        combined_proba: (n_positions, n_classes) final probability matrix
        position_map: mapping from (date, position) to array index
    """
    # Build a mapping of all unique positions
    all_positions = set()
    direction_names = ['backward_10', 'centered_10', 'forward_10',
                      'backward_15', 'forward_15',
                      'asymm_past', 'asymm_future']

    for direction in direction_names:
        all_positions.update(direction_results[direction]['indices'])

    # Sort positions for consistent ordering
    all_positions = sorted(all_positions)
    position_map = {pos: idx for idx, pos in enumerate(all_positions)}

    # Get number of classes from first available direction
    n_classes = direction_results['backward_10']['proba'].shape[1]
    n_positions = len(all_positions)

    # Initialize combined predictions
    combined_proba = np.zeros((n_positions, n_classes))
    position_counts = np.zeros(n_positions)  # Track how many directions contributed

    # For each direction, add its weighted contribution
    for direction_name in direction_names:
        direction_data = direction_results[direction_name]
        proba = direction_data['proba']
        indices = direction_data['indices']

        # Get confidence (max probability) for each prediction
        confidences = np.max(proba, axis=1)

        # Add weighted contribution to combined predictions
        for i, pos in enumerate(indices):
            pos_idx = position_map[pos]

            if method == 'confidence_weighted':
                # Weight by confidence
                weight = confidences[i]
                combined_proba[pos_idx] += proba[i] * weight
            elif method == 'equal':
                # Equal weight
                combined_proba[pos_idx] += proba[i]
            elif method == 'softmax':
                # Will apply softmax later
                combined_proba[pos_idx] += proba[i] * confidences[i]

            position_counts[pos_idx] += 1 if method == 'equal' else confidences[i]

    # Normalize by total weight
    for i in range(n_positions):
        if position_counts[i] > 0:
            combined_proba[i] /= position_counts[i]

    return combined_proba, position_map

print("‚úÖ Multi-directional prediction functions defined (handles 7 directions)")

‚úÖ Multi-directional prediction functions defined (handles 7 directions)


In [8]:
def apply_confidence_weighted_voting(predictions_proba, vote_window=5):
    """
    Confidence-weighted temporal voting

    Instead of simple majority voting, weight each prediction by its confidence (max probability).

    Args:
        predictions_proba: (n_samples, n_classes) probability matrix from ensemble
        vote_window: window size for voting

    Returns:
        voted_predictions: (n_samples,) final class predictions
    """
    n_samples, n_classes = predictions_proba.shape
    voted_predictions = np.zeros(n_samples, dtype=int)

    for i in range(n_samples):
        # Get window boundaries
        half_window = vote_window // 2
        start = max(0, i - half_window)
        end = min(n_samples, i + half_window + 1)

        # Get probabilities within window
        window_proba = predictions_proba[start:end]  # (window_size, n_classes)

        # Get confidence (max probability) for each prediction in window
        window_confidences = np.max(window_proba, axis=1)  # (window_size,)

        # Weight each prediction by its confidence
        weighted_votes = np.zeros(n_classes)
        for j in range(len(window_proba)):
            # Each timestep contributes its probability * its confidence
            weighted_votes += window_proba[j] * window_confidences[j]

        # Final prediction: class with highest weighted vote
        voted_predictions[i] = np.argmax(weighted_votes)

    return voted_predictions

print("‚úÖ Temporal voting function defined")

‚úÖ Temporal voting function defined


In [9]:
def run_extended_multidirectional_pipeline(train_df, test_df, seed, n_ensemble=5,
                                           vote_window=5,
                                           combination_method='confidence_weighted',
                                           verbose=False):
    """
    EXPERIMENT 2: Extended multi-directional windows (7 directions) with confidence-weighted aggregation

    Pipeline:
    1. Train ensemble of models (same as baseline)
    2. Create 7 directional windows (backward_10, centered_10, forward_10, backward_15, forward_15, asymm_past, asymm_future)
    3. Get ensemble predictions for each direction
    4. Combine directions using confidence weighting
    5. Apply temporal voting

    Args:
        combination_method: 'confidence_weighted', 'equal', or 'softmax'
    """
    # 0. Clear session and set seeds
    tf.keras.backend.clear_session()
    set_seeds(seed)

    if verbose:
        print(f"\n  Seed {seed}: Training ensemble...")

    # 1. Train Ensemble Models (SAME AS BASELINE)
    models, label_encoder = train_ensemble_models(
        train_df,
        n_models=n_ensemble,
        base_seed=seed,
        verbose=verbose
    )

    if verbose:
        print("  Creating extended multi-directional windows (7 directions)...")

    # 2. Prepare Test Data with Extended Multi-Directional Windows (NEW - 7 DIRECTIONS)
    test_vectors = create_beacon_count_vectors(test_df)
    direction_windows = create_extended_multidirectional_windows(test_vectors)

    if verbose:
        print(f"    Backward_10 windows: {len(direction_windows['backward_10']['sequences'])}")
        print(f"    Centered_10 windows: {len(direction_windows['centered_10']['sequences'])}")
        print(f"    Forward_10 windows: {len(direction_windows['forward_10']['sequences'])}")
        print(f"    Backward_15 windows: {len(direction_windows['backward_15']['sequences'])}")
        print(f"    Forward_15 windows: {len(direction_windows['forward_15']['sequences'])}")
        print(f"    Asymm_past windows: {len(direction_windows['asymm_past']['sequences'])}")
        print(f"    Asymm_future windows: {len(direction_windows['asymm_future']['sequences'])}")
        print("  Getting directional predictions...")

    # 3. Get Predictions for Each Direction (7 DIRECTIONS)
    direction_results = {}
    direction_names = ['backward_10', 'centered_10', 'forward_10',
                      'backward_15', 'forward_15',
                      'asymm_past', 'asymm_future']

    for direction_name in direction_names:
        if verbose:
            print(f"    Predicting {direction_name}...", end=" ")

        sequences = direction_windows[direction_name]['sequences']
        proba = predict_single_direction(models, sequences, max_seq_length=50)

        direction_results[direction_name] = {
            'proba': proba,
            'indices': direction_windows[direction_name]['indices'],
            'labels': direction_windows[direction_name]['labels']
        }

        if verbose:
            avg_conf = np.mean(np.max(proba, axis=1))
            print(f"avg confidence: {avg_conf:.3f}")

    if verbose:
        print(f"  Combining 7 directions using {combination_method}...")

    # 4. Combine Directional Predictions (7 DIRECTIONS)
    combined_proba, position_map = combine_directional_predictions(
        direction_results,
        method=combination_method
    )

    # Get ground truth labels in same order as combined predictions
    y_test = []
    for pos in sorted(position_map.keys()):
        # Use label from any direction (they should all be the same for a given position)
        for direction_name in direction_names:
            if pos in direction_results[direction_name]['indices']:
                idx = direction_results[direction_name]['indices'].index(pos)
                y_test.append(direction_results[direction_name]['labels'][idx])
                break

    if verbose:
        print(f"  Applying temporal voting (window={vote_window})...")

    # 5. Apply Confidence-Weighted Temporal Voting (SAME AS BASELINE)
    y_pred_voted_encoded = apply_confidence_weighted_voting(combined_proba, vote_window=vote_window)
    y_pred = label_encoder.inverse_transform(y_pred_voted_encoded)

    # 6. Final Evaluation
    macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
    per_class_f1 = f1_score(y_test, y_pred, average=None, labels=label_encoder.classes_, zero_division=0)

    if verbose:
        print(f"  ‚úì Macro F1: {macro_f1:.4f}")

    return {
        'seed': seed,
        'macro_f1': macro_f1,
        'per_class_f1': {label: f1 for label, f1 in zip(label_encoder.classes_, per_class_f1)},
        'combination_method': combination_method
    }

print("‚úÖ Complete extended multi-directional pipeline defined (7 directions)")

‚úÖ Complete extended multi-directional pipeline defined (7 directions)


In [10]:
# Check GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))

Num GPUs Available:  1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [11]:
# FULL EXPERIMENT: All 4 folds, 3 seeds each
print("="*80)
print("FULL 4-FOLD CROSS-VALIDATION - EXPERIMENT 2")
print("="*80)

seeds = [42, 123, 456]
folds = {
    1: (train_df_1, test_df_1),
    2: (train_df_2, test_df_2),
    3: (train_df_3, test_df_3),
    4: (train_df_4, test_df_4)
}

all_fold_results = {}

for fold_num, (train_df, test_df) in folds.items():
    print(f"\n{'='*80}")
    print(f"PROCESSING FOLD {fold_num}")
    print(f"{'='*80}\n")

    fold_results = []

    for seed in seeds:
        print(f"  Running seed {seed}...", end=" ")
        result = run_extended_multidirectional_pipeline(
            train_df, test_df,
            seed=seed,
            n_ensemble=5,
            vote_window=5,
            combination_method='confidence_weighted',
            verbose=False
        )
        fold_results.append(result)
        print(f"Macro F1: {result['macro_f1']:.4f}")

    all_fold_results[fold_num] = fold_results

    # Calculate fold statistics
    macro_f1_scores = [r['macro_f1'] for r in fold_results]
    print(f"\n  Fold {fold_num} Summary:")
    print(f"    Mean Macro F1: {np.mean(macro_f1_scores):.4f} ¬± {np.std(macro_f1_scores):.4f}")
    print(f"    Min: {np.min(macro_f1_scores):.4f}, Max: {np.max(macro_f1_scores):.4f}")

print("\n" + "="*80)
print("ALL FOLDS COMPLETED!")
print("="*80)

FULL 4-FOLD CROSS-VALIDATION - EXPERIMENT 2

PROCESSING FOLD 1

  Running seed 42... Macro F1: 0.5059
  Running seed 123... Macro F1: 0.4935
  Running seed 456... Macro F1: 0.4695

  Fold 1 Summary:
    Mean Macro F1: 0.4896 ¬± 0.0151
    Min: 0.4695, Max: 0.5059

PROCESSING FOLD 2

  Running seed 42... Macro F1: 0.4187
  Running seed 123... Macro F1: 0.4373
  Running seed 456... Macro F1: 0.4325

  Fold 2 Summary:
    Mean Macro F1: 0.4295 ¬± 0.0079
    Min: 0.4187, Max: 0.4373

PROCESSING FOLD 3

  Running seed 42... Macro F1: 0.4121
  Running seed 123... Macro F1: 0.4321
  Running seed 456... Macro F1: 0.3898

  Fold 3 Summary:
    Mean Macro F1: 0.4113 ¬± 0.0173
    Min: 0.3898, Max: 0.4321

PROCESSING FOLD 4

  Running seed 42... Macro F1: 0.4285
  Running seed 123... Macro F1: 0.4117
  Running seed 456... Macro F1: 0.4287

  Fold 4 Summary:
    Mean Macro F1: 0.4230 ¬± 0.0080
    Min: 0.4117, Max: 0.4287

ALL FOLDS COMPLETED!


In [12]:
# Display summary and comparison
print("\n" + "="*80)
print("FINAL SUMMARY - EXPERIMENT 2 (7 DIRECTIONS)")
print("="*80 + "\n")

for fold_num in [1, 2, 3, 4]:
    macro_f1_scores = [r['macro_f1'] for r in all_fold_results[fold_num]]
    print(f"Fold {fold_num}: {np.mean(macro_f1_scores):.4f} ¬± {np.std(macro_f1_scores):.4f}")

all_macro_f1 = []
for fold_num in [1, 2, 3, 4]:
    all_macro_f1.extend([r['macro_f1'] for r in all_fold_results[fold_num]])

print(f"\n{'='*80}")
print(f"Overall Mean: {np.mean(all_macro_f1):.4f} ¬± {np.std(all_macro_f1):.4f}")
print(f"{'='*80}")

print("\n" + "="*80)
print("PROGRESSION:")
print("="*80)
print("Baseline (Approach 24 - single direction):")
print("  Overall: 0.4106 ¬± 0.0266")
print(f"\nExperiment 1 (3 directions):")
print(f"  Overall: 0.4273 ¬± 0.0312  (+0.0167 vs baseline)")
print(f"\nExperiment 2 (7 directions):")
print(f"  Overall: {np.mean(all_macro_f1):.4f} ¬± {np.std(all_macro_f1):.4f}  ({np.mean(all_macro_f1) - 0.4106:+.4f} vs baseline, {np.mean(all_macro_f1) - 0.4273:+.4f} vs Exp1)")

total_gain = np.mean(all_macro_f1) - 0.4106
target_gap = 0.45 - np.mean(all_macro_f1)

print(f"\n{'='*80}")
print(f"Total gain from baseline: {total_gain:+.4f}")
print(f"Gap to target (0.45): {target_gap:.4f}")
print(f"{'='*80}")

if np.mean(all_macro_f1) >= 0.45:
    print("\nüéØüéØüéØ TARGET ACHIEVED! 0.45 F1 REACHED! üéØüéØüéØ")
elif np.mean(all_macro_f1) > 0.4273:
    print("\n‚úÖ Extended directions improved over 3 directions!")
    print(f"   Recommendation: Try spatial constraints (Viterbi) to close remaining {target_gap:.4f} gap")
else:
    print("\n‚ö†Ô∏è  7 directions didn't improve over 3 directions")
    print("   Recommendation: Try spatial constraints or hyperparameter tuning instead")


FINAL SUMMARY - EXPERIMENT 2 (7 DIRECTIONS)

Fold 1: 0.4896 ¬± 0.0151
Fold 2: 0.4295 ¬± 0.0079
Fold 3: 0.4113 ¬± 0.0173
Fold 4: 0.4230 ¬± 0.0080

Overall Mean: 0.4384 ¬± 0.0329

PROGRESSION:
Baseline (Approach 24 - single direction):
  Overall: 0.4106 ¬± 0.0266

Experiment 1 (3 directions):
  Overall: 0.4273 ¬± 0.0312  (+0.0167 vs baseline)

Experiment 2 (7 directions):
  Overall: 0.4384 ¬± 0.0329  (+0.0278 vs baseline, +0.0111 vs Exp1)

Total gain from baseline: +0.0278
Gap to target (0.45): 0.0116

‚úÖ Extended directions improved over 3 directions!
   Recommendation: Try spatial constraints (Viterbi) to close remaining 0.0116 gap


In [13]:
# Save results to text file
with open('experiment2_results.txt', 'w') as f:
    f.write("="*80 + "\n")
    f.write("EXPERIMENT 2: EXTENDED MULTI-DIRECTIONAL (7 DIRECTIONS)\n")
    f.write("="*80 + "\n\n")

    f.write("Configuration:\n")
    f.write("-"*80 + "\n")
    f.write("Directions (7 total):\n")
    f.write("  1. backward_10:  [t-9 to t] - 10s history\n")
    f.write("  2. centered_10:  [t-4 to t+5] - 10s centered\n")
    f.write("  3. forward_10:   [t to t+9] - 10s future\n")
    f.write("  4. backward_15:  [t-14 to t] - 15s history (more context)\n")
    f.write("  5. forward_15:   [t to t+14] - 15s future (earlier transition)\n")
    f.write("  6. asymm_past:   [t-11 to t+3] - heavy past bias\n")
    f.write("  7. asymm_future: [t-3 to t+11] - heavy future bias\n")
    f.write("\nCombination method: Confidence-weighted\n")
    f.write("Ensemble size: 5 models\n")
    f.write("Temporal voting window: 5 seconds\n\n")

    # Overall summary
    all_macro_f1 = []
    for fold_num in [1, 2, 3, 4]:
        fold_scores = [r['macro_f1'] for r in all_fold_results[fold_num]]
        all_macro_f1.extend(fold_scores)

    f.write("OVERALL RESULTS:\n")
    f.write("-"*80 + "\n")
    f.write(f"Mean Macro F1: {np.mean(all_macro_f1):.4f} ¬± {np.std(all_macro_f1):.4f}\n")
    f.write(f"Min: {np.min(all_macro_f1):.4f}, Max: {np.max(all_macro_f1):.4f}\n\n")

    # Comparison
    f.write("PROGRESSION:\n")
    f.write("-"*80 + "\n")
    f.write("Baseline (single direction backward): 0.4106 ¬± 0.0266\n")
    f.write("Experiment 1 (3 directions): 0.4273 ¬± 0.0312\n")
    f.write(f"Experiment 2 (7 directions): {np.mean(all_macro_f1):.4f} ¬± {np.std(all_macro_f1):.4f}\n\n")

    f.write(f"Gain vs Baseline: {np.mean(all_macro_f1) - 0.4106:+.4f}\n")
    f.write(f"Gain vs Experiment 1: {np.mean(all_macro_f1) - 0.4273:+.4f}\n")
    f.write(f"Gap to target (0.45): {0.45 - np.mean(all_macro_f1):.4f}\n\n")

    # Per-fold results
    for fold_num in [1, 2, 3, 4]:
        f.write(f"\n{'='*80}\n")
        f.write(f"FOLD {fold_num} RESULTS\n")
        f.write(f"{'='*80}\n\n")

        fold_results = all_fold_results[fold_num]
        macro_f1_scores = [r['macro_f1'] for r in fold_results]

        f.write(f"Macro F1 Scores:\n")
        f.write("-"*80 + "\n")
        for i, result in enumerate(fold_results):
            f.write(f"  Seed {result['seed']:5d}: {result['macro_f1']:.4f}\n")

        f.write(f"\nStatistics:\n")
        f.write(f"  Mean: {np.mean(macro_f1_scores):.4f} ¬± {np.std(macro_f1_scores):.4f}\n")
        f.write(f"  Min:  {np.min(macro_f1_scores):.4f}\n")
        f.write(f"  Max:  {np.max(macro_f1_scores):.4f}\n")

        # Per-class F1 (averaged across seeds)
        f.write(f"\nPer-Class F1 Scores (averaged across seeds):\n")
        f.write("-"*80 + "\n")

        # Collect all class names
        all_classes = set()
        for result in fold_results:
            all_classes.update(result['per_class_f1'].keys())

        # Average per-class F1 across seeds
        for class_name in sorted(all_classes):
            class_f1_scores = [r['per_class_f1'].get(class_name, 0) for r in fold_results]
            mean_f1 = np.mean(class_f1_scores)
            std_f1 = np.std(class_f1_scores)
            f.write(f"  {class_name:20s}: {mean_f1:.4f} ¬± {std_f1:.4f}\n")

print("‚úÖ Results saved to experiment2_results.txt")

‚úÖ Results saved to experiment2_results.txt
