In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Setup and Imports
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.layers import Bidirectional, GRU
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings('ignore')

print("âœ“ All imports successful")

âœ“ All imports successful


In [3]:
def load_and_filter_fold(i):
    train_dir = f'/content/drive/MyDrive/split_data/fold{i}/train.csv'
    test_dir = f'/content/drive/MyDrive/split_data/fold{i}/test.csv'
    train_df = pd.read_csv(train_dir)
    test_df = pd.read_csv(test_dir)

    train_labels = list(train_df['room'].unique())
    test_labels = list(test_df['room'].unique())
    common_labels = list(set(train_labels) & set(test_labels))

    train_df = train_df[train_df['room'].isin(common_labels)].reset_index(drop=True)
    test_df = test_df[test_df['room'].isin(common_labels)].reset_index(drop=True)

    return train_df, test_df

# Load all 4 folds
train_df_1, test_df_1 = load_and_filter_fold(1)
train_df_2, test_df_2 = load_and_filter_fold(2)
train_df_3, test_df_3 = load_and_filter_fold(3)
train_df_4, test_df_4 = load_and_filter_fold(4)

print("âœ“ All folds loaded")

âœ“ All folds loaded


In [4]:
def set_seeds(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

def create_room_groups(df):
    df = df.sort_values('timestamp').reset_index(drop=True)
    df['room_group'] = (df['room'] != df['room'].shift()).cumsum()
    return df

def create_beacon_count_vectors(df):
    """Aggregates readings into 1s vectors. Handles data with or without 'room_group'."""
    vectors = []
    has_groups = 'room_group' in df.columns # Check if we are in 'training' mode

    for _, group in df.groupby('timestamp'):
        beacon_counts = group['mac address'].value_counts()
        total_readings = len(group)

        vector = [0.0] * 23
        for beacon_id, count in beacon_counts.items():
            if 1 <= beacon_id <= 23:
                vector[int(beacon_id) - 1] = count / total_readings

        entry = {
            'timestamp': group['timestamp'].iloc[0],
            'room': group['room'].iloc[0],
            'beacon_vector': vector
        }

        if has_groups:
            entry['room_group'] = group['room_group'].iloc[0]

        vectors.append(entry)

    return pd.DataFrame(vectors)

def create_sequences_from_groups(vector_df, min_length=3, max_length=50):
    """Used for Training: Creates clean sequences where the room is constant."""
    sequences = []
    labels = []

    for (room, room_group), group in vector_df.groupby(['room', 'room_group']):
        group = group.sort_values('timestamp').reset_index(drop=False)
        seq_length = len(group)

        if seq_length < min_length:
            continue

        if seq_length > max_length:
            group = group.tail(max_length)

        sequence = [row['beacon_vector'] for _, row in group.iterrows()]
        sequences.append(sequence)
        labels.append(room)

    return sequences, labels

def build_bidirectional_gru_model(input_shape, num_classes):
    """
    Bidirectional GRU Architecture
    """
    model = Sequential([
        Masking(mask_value=0.0, input_shape=input_shape),

        Bidirectional(GRU(128, return_sequences=True)),
        Dropout(0.3),

        Bidirectional(GRU(64, return_sequences=False)),
        Dropout(0.3),

        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

print("âœ… Basic functions defined")

âœ… Basic functions defined


In [5]:
def create_extended_multidirectional_windows(vector_df):
    """
    Create 7 types of sliding windows for extended multi-directional prediction
    (Same as Experiment 2)
    """
    # Ensure chronological order and group by day
    vector_df['dt'] = pd.to_datetime(vector_df['timestamp'])
    vector_df['date'] = vector_df['dt'].dt.date

    results = {
        'backward_10': {'sequences': [], 'labels': [], 'indices': []},
        'centered_10': {'sequences': [], 'labels': [], 'indices': []},
        'forward_10': {'sequences': [], 'labels': [], 'indices': []},
        'backward_15': {'sequences': [], 'labels': [], 'indices': []},
        'forward_15': {'sequences': [], 'labels': [], 'indices': []},
        'asymm_past': {'sequences': [], 'labels': [], 'indices': []},
        'asymm_future': {'sequences': [], 'labels': [], 'indices': []},
    }

    for _, day_group in vector_df.groupby('date'):
        day_group = day_group.sort_values('timestamp').reset_index(drop=True)
        vectors = list(day_group['beacon_vector'])
        rooms = list(day_group['room'])
        n = len(vectors)

        for i in range(n):
            # 1. BACKWARD_10
            if i >= 9:
                window = vectors[i - 9 : i + 1]
                results['backward_10']['sequences'].append(window)
                results['backward_10']['labels'].append(rooms[i])
                results['backward_10']['indices'].append((day_group['date'].iloc[0], i))

            # 2. CENTERED_10
            if i >= 4 and i + 5 < n:
                window = vectors[i - 4 : i + 6]
                results['centered_10']['sequences'].append(window)
                results['centered_10']['labels'].append(rooms[i])
                results['centered_10']['indices'].append((day_group['date'].iloc[0], i))

            # 3. FORWARD_10
            if i + 9 < n:
                window = vectors[i : i + 10]
                results['forward_10']['sequences'].append(window)
                results['forward_10']['labels'].append(rooms[i])
                results['forward_10']['indices'].append((day_group['date'].iloc[0], i))

            # 4. BACKWARD_15
            if i >= 14:
                window = vectors[i - 14 : i + 1]
                results['backward_15']['sequences'].append(window)
                results['backward_15']['labels'].append(rooms[i])
                results['backward_15']['indices'].append((day_group['date'].iloc[0], i))

            # 5. FORWARD_15
            if i + 14 < n:
                window = vectors[i : i + 15]
                results['forward_15']['sequences'].append(window)
                results['forward_15']['labels'].append(rooms[i])
                results['forward_15']['indices'].append((day_group['date'].iloc[0], i))

            # 6. ASYMM_PAST
            if i >= 11 and i + 3 < n:
                window = vectors[i - 11 : i + 4]
                results['asymm_past']['sequences'].append(window)
                results['asymm_past']['labels'].append(rooms[i])
                results['asymm_past']['indices'].append((day_group['date'].iloc[0], i))

            # 7. ASYMM_FUTURE
            if i >= 3 and i + 11 < n:
                window = vectors[i - 3 : i + 12]
                results['asymm_future']['sequences'].append(window)
                results['asymm_future']['labels'].append(rooms[i])
                results['asymm_future']['indices'].append((day_group['date'].iloc[0], i))

    return results

print("âœ… Extended multi-directional window function defined (7 directions)")

âœ… Extended multi-directional window function defined (7 directions)


In [6]:
def train_ensemble_models(train_df, n_models=5, base_seed=42, verbose=False):
    """
    Train multiple models with different seeds for ensemble
    """
    if verbose:
        print(f"  Training ensemble of {n_models} models...")

    # Prepare data
    train_df_grouped = create_room_groups(train_df)
    train_vector_df = create_beacon_count_vectors(train_df_grouped)
    X_train_seq, y_train_labels = create_sequences_from_groups(train_vector_df, max_length=50)

    # Encode labels
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train_labels)

    # Pad sequences
    X_train_padded = pad_sequences(X_train_seq, maxlen=50, padding='post', dtype='float32', value=0.0)

    # Compute class weights
    class_weights_array = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights = {i: weight for i, weight in enumerate(class_weights_array)}

    # Train multiple models
    models = []
    for i in range(n_models):
        model_seed = base_seed + i * 1000
        set_seeds(model_seed)

        if verbose:
            print(f"    Model {i+1}/{n_models} (seed {model_seed})...", end=" ")

        model = build_bidirectional_gru_model(
            input_shape=(50, 23),
            num_classes=len(label_encoder.classes_)
        )

        early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True, verbose=0)
        reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=3, verbose=0, min_lr=1e-6)

        model.fit(
            X_train_padded, y_train,
            epochs=30,
            batch_size=32,
            class_weight=class_weights,
            callbacks=[early_stop, reduce_lr],
            verbose=0
        )

        models.append(model)

        if verbose:
            print("âœ“")

    return models, label_encoder

print("âœ“ Ensemble training function defined")

âœ“ Ensemble training function defined


In [7]:
def predict_single_direction(models, sequences, max_seq_length=50):
    """
    Get ensemble predictions for a single direction
    """
    X_padded = pad_sequences(sequences, maxlen=max_seq_length, dtype='float32', padding='post', value=0.0)

    all_predictions = []
    for model in models:
        proba = model.predict(X_padded, verbose=0)
        all_predictions.append(proba)

    ensemble_proba = np.mean(all_predictions, axis=0)

    return ensemble_proba

def combine_directional_predictions_adaptive(direction_results,
                                             high_conf_threshold=0.70,
                                             very_high_conf_threshold=0.75,
                                             centered_boost_threshold=0.68):
    """
    NEW: Adaptive confidence-based direction combination

    Key innovation: Dynamically adjust direction weights based on confidence patterns

    Strategy:
    1. If centered_10 has high confidence (>0.68), boost its weight (it had 0.655 avg in Exp 1)
    2. If any direction has very high confidence (>0.75), boost it significantly
    3. If all directions have low confidence (<0.60), use equal weighting (uncertain situation)
    4. Otherwise, use standard confidence weighting

    Args:
        direction_results: Dict with keys for all 7 directions
        high_conf_threshold: Threshold for boosting a direction (default 0.70)
        very_high_conf_threshold: Threshold for strong boost (default 0.75)
        centered_boost_threshold: Lower threshold for centered (it's historically best)

    Returns:
        combined_proba: (n_positions, n_classes) final probability matrix
        position_map: mapping from (date, position) to array index
    """
    # Build position mapping
    all_positions = set()
    direction_names = ['backward_10', 'centered_10', 'forward_10',
                      'backward_15', 'forward_15',
                      'asymm_past', 'asymm_future']

    for direction in direction_names:
        all_positions.update(direction_results[direction]['indices'])

    all_positions = sorted(all_positions)
    position_map = {pos: idx for idx, pos in enumerate(all_positions)}

    n_classes = direction_results['backward_10']['proba'].shape[1]
    n_positions = len(all_positions)

    combined_proba = np.zeros((n_positions, n_classes))
    position_counts = np.zeros(n_positions)

    # Pre-compute confidences for all directions
    direction_confidences = {}
    for direction_name in direction_names:
        proba = direction_results[direction_name]['proba']
        direction_confidences[direction_name] = np.max(proba, axis=1)

    # For each position, apply adaptive weighting
    for pos_idx, pos in enumerate(all_positions):
        # Collect all directions that have this position
        position_directions = {}
        position_confs = {}

        for direction_name in direction_names:
            if pos in direction_results[direction_name]['indices']:
                idx = direction_results[direction_name]['indices'].index(pos)
                position_directions[direction_name] = direction_results[direction_name]['proba'][idx]
                position_confs[direction_name] = direction_confidences[direction_name][idx]

        if not position_directions:
            continue

        # ADAPTIVE WEIGHTING LOGIC
        weights = {}

        # Check confidence patterns
        max_conf = max(position_confs.values())
        avg_conf = np.mean(list(position_confs.values()))
        centered_conf = position_confs.get('centered_10', 0)

        # Strategy 1: Very high confidence in any direction â†’ Trust it heavily
        if max_conf >= very_high_conf_threshold:
            for direction_name, conf in position_confs.items():
                if conf >= very_high_conf_threshold:
                    weights[direction_name] = conf * 2.5  # Strong boost
                elif conf >= high_conf_threshold:
                    weights[direction_name] = conf * 1.2  # Moderate boost
                else:
                    weights[direction_name] = conf * 0.5  # Reduce low-conf directions

        # Strategy 2: Centered is confident â†’ Trust it more (it had highest avg in Exp 1)
        elif centered_conf >= centered_boost_threshold:
            for direction_name, conf in position_confs.items():
                if direction_name == 'centered_10':
                    weights[direction_name] = conf * 1.8  # Boost centered
                else:
                    weights[direction_name] = conf * 0.8  # Slight reduction for others

        # Strategy 3: All low confidence â†’ Equal weighting (uncertain)
        elif avg_conf < 0.60:
            for direction_name, conf in position_confs.items():
                weights[direction_name] = 1.0  # Equal weight when all uncertain

        # Strategy 4: Normal case â†’ Standard confidence weighting
        else:
            for direction_name, conf in position_confs.items():
                weights[direction_name] = conf  # Standard confidence weight

        # Combine predictions with adaptive weights
        total_weight = sum(weights.values())
        for direction_name, weight in weights.items():
            combined_proba[pos_idx] += position_directions[direction_name] * weight

        # Normalize
        if total_weight > 0:
            combined_proba[pos_idx] /= total_weight

    return combined_proba, position_map

print("âœ… Adaptive confidence-based direction combination defined")

âœ… Adaptive confidence-based direction combination defined


In [8]:
def apply_confidence_weighted_voting(predictions_proba, vote_window=5):
    """
    Confidence-weighted temporal voting (same as before)
    """
    n_samples, n_classes = predictions_proba.shape
    voted_predictions = np.zeros(n_samples, dtype=int)

    for i in range(n_samples):
        half_window = vote_window // 2
        start = max(0, i - half_window)
        end = min(n_samples, i + half_window + 1)

        window_proba = predictions_proba[start:end]
        window_confidences = np.max(window_proba, axis=1)

        weighted_votes = np.zeros(n_classes)
        for j in range(len(window_proba)):
            weighted_votes += window_proba[j] * window_confidences[j]

        voted_predictions[i] = np.argmax(weighted_votes)

    return voted_predictions

print("âœ… Temporal voting function defined")

âœ… Temporal voting function defined


In [9]:
def run_adaptive_multidirectional_pipeline(train_df, test_df, seed, n_ensemble=5,
                                          vote_window=5,
                                          high_conf_threshold=0.70,
                                          very_high_conf_threshold=0.75,
                                          centered_boost_threshold=0.68,
                                          verbose=False):
    """
    EXPERIMENT 4: 7 directions + Adaptive confidence-based weighting

    Pipeline:
    1. Train ensemble (5 models)
    2. Create 7 directional windows
    3. Get ensemble predictions for each direction
    4. NEW: Adaptive confidence-based direction combination
    5. Apply temporal voting
    """
    tf.keras.backend.clear_session()
    set_seeds(seed)

    if verbose:
        print(f"\n  Seed {seed}: Training ensemble...")

    # 1. Train Ensemble
    models, label_encoder = train_ensemble_models(
        train_df,
        n_models=n_ensemble,
        base_seed=seed,
        verbose=verbose
    )

    if verbose:
        print("  Creating multi-directional windows (7 directions)...")

    # 2. Create Windows
    test_vectors = create_beacon_count_vectors(test_df)
    direction_windows = create_extended_multidirectional_windows(test_vectors)

    if verbose:
        for direction_name in ['backward_10', 'centered_10', 'forward_10',
                              'backward_15', 'forward_15', 'asymm_past', 'asymm_future']:
            print(f"    {direction_name}: {len(direction_windows[direction_name]['sequences'])} windows")
        print("  Getting directional predictions...")

    # 3. Get Predictions
    direction_results = {}
    direction_names = ['backward_10', 'centered_10', 'forward_10',
                      'backward_15', 'forward_15',
                      'asymm_past', 'asymm_future']

    for direction_name in direction_names:
        if verbose:
            print(f"    Predicting {direction_name}...", end=" ")

        sequences = direction_windows[direction_name]['sequences']
        proba = predict_single_direction(models, sequences, max_seq_length=50)

        direction_results[direction_name] = {
            'proba': proba,
            'indices': direction_windows[direction_name]['indices'],
            'labels': direction_windows[direction_name]['labels']
        }

        if verbose:
            avg_conf = np.mean(np.max(proba, axis=1))
            print(f"avg confidence: {avg_conf:.3f}")

    if verbose:
        print(f"  Combining with ADAPTIVE confidence thresholds...")
        print(f"    High conf threshold: {high_conf_threshold}")
        print(f"    Very high conf threshold: {very_high_conf_threshold}")
        print(f"    Centered boost threshold: {centered_boost_threshold}")

    # 4. NEW: Adaptive Combination
    combined_proba, position_map = combine_directional_predictions_adaptive(
        direction_results,
        high_conf_threshold=high_conf_threshold,
        very_high_conf_threshold=very_high_conf_threshold,
        centered_boost_threshold=centered_boost_threshold
    )

    # Get ground truth labels
    y_test = []
    for pos in sorted(position_map.keys()):
        for direction_name in direction_names:
            if pos in direction_results[direction_name]['indices']:
                idx = direction_results[direction_name]['indices'].index(pos)
                y_test.append(direction_results[direction_name]['labels'][idx])
                break

    if verbose:
        print(f"  Applying temporal voting (window={vote_window})...")

    # 5. Temporal Voting
    y_pred_voted_encoded = apply_confidence_weighted_voting(combined_proba, vote_window=vote_window)
    y_pred = label_encoder.inverse_transform(y_pred_voted_encoded)

    # 6. Evaluation
    macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
    per_class_f1 = f1_score(y_test, y_pred, average=None, labels=label_encoder.classes_, zero_division=0)

    if verbose:
        print(f"  âœ“ Macro F1: {macro_f1:.4f}")

    return {
        'seed': seed,
        'macro_f1': macro_f1,
        'per_class_f1': {label: f1 for label, f1 in zip(label_encoder.classes_, per_class_f1)},
        'thresholds': {
            'high_conf': high_conf_threshold,
            'very_high_conf': very_high_conf_threshold,
            'centered_boost': centered_boost_threshold
        }
    }

print("âœ… Complete adaptive multi-directional pipeline defined")

âœ… Complete adaptive multi-directional pipeline defined


In [10]:
# Check GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))

Num GPUs Available:  1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [11]:
# FULL EXPERIMENT
print("="*80)
print("FULL 4-FOLD CROSS-VALIDATION - EXPERIMENT 4")
print("="*80)

seeds = [42, 123, 456]
folds = {
    1: (train_df_1, test_df_1),
    2: (train_df_2, test_df_2),
    3: (train_df_3, test_df_3),
    4: (train_df_4, test_df_4)
}

all_fold_results = {}

for fold_num, (train_df, test_df) in folds.items():
    print(f"\n{'='*80}")
    print(f"PROCESSING FOLD {fold_num}")
    print(f"{'='*80}\n")

    fold_results = []

    for seed in seeds:
        print(f"  Running seed {seed}...", end=" ")
        result = run_adaptive_multidirectional_pipeline(
            train_df, test_df,
            seed=seed,
            n_ensemble=5,
            vote_window=5,
            high_conf_threshold=0.70,
            very_high_conf_threshold=0.75,
            centered_boost_threshold=0.68,
            verbose=False
        )
        fold_results.append(result)
        print(f"Macro F1: {result['macro_f1']:.4f}")

    all_fold_results[fold_num] = fold_results

    macro_f1_scores = [r['macro_f1'] for r in fold_results]
    print(f"\n  Fold {fold_num} Summary:")
    print(f"    Mean Macro F1: {np.mean(macro_f1_scores):.4f} Â± {np.std(macro_f1_scores):.4f}")
    print(f"    Min: {np.min(macro_f1_scores):.4f}, Max: {np.max(macro_f1_scores):.4f}")

print("\n" + "="*80)
print("ALL FOLDS COMPLETED!")
print("="*80)

FULL 4-FOLD CROSS-VALIDATION - EXPERIMENT 4

PROCESSING FOLD 1

  Running seed 42... Macro F1: 0.5255
  Running seed 123... Macro F1: 0.5025
  Running seed 456... Macro F1: 0.5001

  Fold 1 Summary:
    Mean Macro F1: 0.5094 Â± 0.0115
    Min: 0.5001, Max: 0.5255

PROCESSING FOLD 2

  Running seed 42... Macro F1: 0.4152
  Running seed 123... Macro F1: 0.4302
  Running seed 456... Macro F1: 0.4240

  Fold 2 Summary:
    Mean Macro F1: 0.4231 Â± 0.0061
    Min: 0.4152, Max: 0.4302

PROCESSING FOLD 3

  Running seed 42... Macro F1: 0.4055
  Running seed 123... Macro F1: 0.4252
  Running seed 456... Macro F1: 0.3909

  Fold 3 Summary:
    Mean Macro F1: 0.4072 Â± 0.0140
    Min: 0.3909, Max: 0.4252

PROCESSING FOLD 4

  Running seed 42... Macro F1: 0.4231
  Running seed 123... Macro F1: 0.4065
  Running seed 456... Macro F1: 0.4219

  Fold 4 Summary:
    Mean Macro F1: 0.4172 Â± 0.0076
    Min: 0.4065, Max: 0.4231

ALL FOLDS COMPLETED!


In [12]:
# Final Summary
print("\n" + "="*80)
print("FINAL SUMMARY - EXPERIMENT 4 (ADAPTIVE CONFIDENCE)")
print("="*80 + "\n")

for fold_num in [1, 2, 3, 4]:
    macro_f1_scores = [r['macro_f1'] for r in all_fold_results[fold_num]]
    print(f"Fold {fold_num}: {np.mean(macro_f1_scores):.4f} Â± {np.std(macro_f1_scores):.4f}")

all_macro_f1 = []
for fold_num in [1, 2, 3, 4]:
    all_macro_f1.extend([r['macro_f1'] for r in all_fold_results[fold_num]])

print(f"\n{'='*80}")
print(f"Overall Mean: {np.mean(all_macro_f1):.4f} Â± {np.std(all_macro_f1):.4f}")
print(f"{'='*80}")

print("\n" + "="*80)
print("COMPLETE PROGRESSION:")
print("="*80)
print("Baseline: 0.4106 Â± 0.0266")
print("Exp 1 (3 dir): 0.4273 Â± 0.0312")
print("Exp 2 (7 dir, standard): 0.4384 Â± 0.0329")
print(f"Exp 4 (7 dir, adaptive): {np.mean(all_macro_f1):.4f} Â± {np.std(all_macro_f1):.4f}")

total_gain = np.mean(all_macro_f1) - 0.4106
gain_from_exp2 = np.mean(all_macro_f1) - 0.4384
target_gap = 0.45 - np.mean(all_macro_f1)

print(f"\n{'='*80}")
print(f"Total gain from baseline: {total_gain:+.4f}")
print(f"Gain from Exp 2: {gain_from_exp2:+.4f}")
print(f"Gap to target (0.45): {target_gap:.4f}")
print(f"{'='*80}")

if np.mean(all_macro_f1) >= 0.45:
    print("\nðŸŽ¯ðŸŽ¯ðŸŽ¯ TARGET ACHIEVED! 0.45 F1 REACHED! ðŸŽ¯ðŸŽ¯ðŸŽ¯")
elif gain_from_exp2 > 0.005:
    print(f"\nâœ… Adaptive confidence helps! +{gain_from_exp2:.4f} improvement")
    if target_gap < 0.005:
        print("   SO CLOSE! Try hyperparameter tuning for final push")
    else:
        print(f"   {target_gap:.4f} remaining to target")
else:
    print("\nðŸ“Š Adaptive confidence similar to standard weighting")
    print("   Try: Different thresholds or hyperparameter tuning")


FINAL SUMMARY - EXPERIMENT 4 (ADAPTIVE CONFIDENCE)

Fold 1: 0.5094 Â± 0.0115
Fold 2: 0.4231 Â± 0.0061
Fold 3: 0.4072 Â± 0.0140
Fold 4: 0.4172 Â± 0.0076

Overall Mean: 0.4392 Â± 0.0422

COMPLETE PROGRESSION:
Baseline: 0.4106 Â± 0.0266
Exp 1 (3 dir): 0.4273 Â± 0.0312
Exp 2 (7 dir, standard): 0.4384 Â± 0.0329
Exp 4 (7 dir, adaptive): 0.4392 Â± 0.0422

Total gain from baseline: +0.0286
Gain from Exp 2: +0.0008
Gap to target (0.45): 0.0108

ðŸ“Š Adaptive confidence similar to standard weighting
   Try: Different thresholds or hyperparameter tuning


In [13]:
# Save results
with open('experiment4_results.txt', 'w') as f:
    f.write("="*80 + "\n")
    f.write("EXPERIMENT 4: ADAPTIVE CONFIDENCE-BASED DIRECTION WEIGHTING\n")
    f.write("="*80 + "\n\n")

    f.write("Configuration:\n")
    f.write("-"*80 + "\n")
    f.write("Directions: 7 (same as Exp 2)\n")
    f.write("Ensemble: 5 models\n")
    f.write("Temporal voting: 5 seconds\n")
    f.write("\nNEW Adaptive Thresholds:\n")
    f.write("  High confidence threshold: 0.70\n")
    f.write("  Very high confidence threshold: 0.75\n")
    f.write("  Centered boost threshold: 0.68\n\n")

    all_macro_f1 = []
    for fold_num in [1, 2, 3, 4]:
        fold_scores = [r['macro_f1'] for r in all_fold_results[fold_num]]
        all_macro_f1.extend(fold_scores)

    f.write("OVERALL RESULTS:\n")
    f.write("-"*80 + "\n")
    f.write(f"Mean Macro F1: {np.mean(all_macro_f1):.4f} Â± {np.std(all_macro_f1):.4f}\n")
    f.write(f"Min: {np.min(all_macro_f1):.4f}, Max: {np.max(all_macro_f1):.4f}\n\n")

    f.write("PROGRESSION:\n")
    f.write("-"*80 + "\n")
    f.write("Baseline: 0.4106\n")
    f.write("Exp 2 (7 dir, standard): 0.4384\n")
    f.write(f"Exp 4 (7 dir, adaptive): {np.mean(all_macro_f1):.4f}\n\n")
    f.write(f"Gain from baseline: {np.mean(all_macro_f1) - 0.4106:+.4f}\n")
    f.write(f"Gain from Exp 2: {np.mean(all_macro_f1) - 0.4384:+.4f}\n")
    f.write(f"Gap to target: {0.45 - np.mean(all_macro_f1):.4f}\n")

    for fold_num in [1, 2, 3, 4]:
        f.write(f"\n{'='*80}\n")
        f.write(f"FOLD {fold_num}\n")
        f.write(f"{'='*80}\n\n")

        fold_results = all_fold_results[fold_num]
        macro_f1_scores = [r['macro_f1'] for r in fold_results]

        for result in fold_results:
            f.write(f"  Seed {result['seed']:5d}: {result['macro_f1']:.4f}\n")

        f.write(f"\n  Mean: {np.mean(macro_f1_scores):.4f} Â± {np.std(macro_f1_scores):.4f}\n")

print("âœ… Results saved to experiment4_results.txt")

âœ… Results saved to experiment4_results.txt
