# Data Improvement

In [None]:
import sys
sys.path.append('../src')

import numpy as np
import pandas as pd
from tensorflow.keras.utils import to_categorical

from preprocessing import load_steering_vectors
from doa import evaluate_doa_predictions, convert_signals_to_covariance_input
from data_improvement import compute_removal_ratio_doa, remove_misclassified_data_doa

## Load Data

In [2]:
data = np.load('../data/original/signals_for_doa.npz')
X_train = data['X_train']
X_test = data['X_test']
y_train = data['y_train']
y_test = data['y_test']
doa2s = data['doa2s']

In [3]:
steering_matrix, DoAs = load_steering_vectors(filepath='../data/original/steering_matrix.mat')
print(f"Steering matrix shape: {steering_matrix.shape}")
print(f"DoAs shape: {DoAs.shape}")

Steering matrix shape: (8, 61)
DoAs shape: (61,)


In [4]:
doa_preds_df = pd.read_csv("../results/noattack/glrt_doa_predictions.csv")
doa_preds = doa_preds_df["y_pred"].values

accuracy_df = pd.read_csv("../results/noattack/glrt_doa_accuracy.csv")
tolerance = int(accuracy_df["tolerance"].values[0])
doa_acc = float(accuracy_df["test_accuracy"].values[0])

print(f"Loaded Tolerance: {tolerance:.4f}")
print(f"Original DoA Accuracy: {doa_acc:.4f}")

Loaded Tolerance: 0.0000
Original DoA Accuracy: 0.8625


## Update Data

In [17]:
removal_ratio = compute_removal_ratio_doa(y_test, doa_preds, target_accuracy=0.97)
print(f"Calculated removal ratio to reach 97%: {removal_ratio:.4f}")

Calculated removal ratio to reach 97%: 0.8060


In [18]:
X_test_filtered, y_test_filtered, removed_indices = remove_misclassified_data_doa(
    X_test, y_test, doa_preds, removal_ratio=0.8060
)

In [19]:
# Convert the signals to covariance input
X_test_filtered_cov = convert_signals_to_covariance_input(X_test_filtered, turn_on_time=750, block_size=750)

# One-hot encode the target DoA class labels
y_test_filtered_onehot = to_categorical(y_test_filtered, num_classes=61)

### Check Upated Results

In [20]:
doa_acc_filtered, doa_preds_filtered = evaluate_doa_predictions(X_test_filtered_cov, y_test_filtered_onehot, steering_matrix, tolerance=tolerance)

In [21]:
print(f"Filtered DoA Accuracy: {doa_acc_filtered:.4f}")

Filtered DoA Accuracy: 0.9696


## Save and Export Filtered Data

In [22]:
np.savez_compressed('../data/processed/improved_doa_signals.npz',
    X_train=X_train,
    X_test=X_test_filtered,
    y_train=y_train,
    y_test=y_test_filtered
)
print("Saved as .npz file.")

Saved as .npz file.
