In [1]:
import numpy as np
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import DictionaryLearning

# Set numpy print options for better readability
np.set_printoptions(threshold=np.inf)
np.set_printoptions(suppress=True)

# Loading dataset
measurement = np.load('../../../dataset/meas_symm_1.npz', allow_pickle=False)
n_comp = 20  # Increased number of components for better representation
header, data = measurement['header'], measurement['data']
data_cir = data['cirs'][:1000]

trainCIR, testCIR = train_test_split(data_cir, test_size=0.2, random_state=42)


def apply_dictionary_learning(data, n_components):
    # Reshape the data to 2D: (samples, features)
    reshaped_data = data.reshape(data.shape[0], -1)
    
    # Scale the data to have zero mean and unit variance
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(reshaped_data)
    
    # Dictionary Learning
    dict_learner = DictionaryLearning(n_components=n_components, transform_algorithm='omp', random_state=42)
    data_dict = dict_learner.fit_transform(data_scaled)
    
    return data_dict, scaler, dict_learner


alice_channel = 3  # Legitimate channel
eve_channel = 6    # Illegitimate channel

# Data and Feature Extraction
alice_train_CIRs = trainCIR[:, alice_channel, :, :]
eve_train_CIRs = trainCIR[:, eve_channel, :, :]
print(f"alice_train_CIRs: {alice_train_CIRs.shape}")

train_cirs = np.vstack((alice_train_CIRs, eve_train_CIRs))  
print(f"train_cirs: {train_cirs.shape}")

# Dictionary Learning -> training data
train_cirs_dict, scaler, dict_learner = apply_dictionary_learning(train_cirs, n_components=n_comp)
print(f"train_cirs_dict: {train_cirs_dict.shape}")

# Labels
alice_train_labels = np.zeros(alice_train_CIRs.shape[0])  
eve_train_labels = np.ones(eve_train_CIRs.shape[0])       
train_labels = np.hstack((alice_train_labels, eve_train_labels))

D = dict_learner.components_.T  # Shape: (features, n_components)
print(f'Dictionary : {D.shape}')

# Extract and prepare test data
alice_test_CIRs = testCIR[:, alice_channel, :, :]
eve_test_CIRs = testCIR[:, eve_channel, :, :]
test_cirs = np.vstack((alice_test_CIRs, eve_test_CIRs))  

# Transform test data with same Scaler and dictionary learner
reshaped_test_cirs = test_cirs.reshape(test_cirs.shape[0], -1)
test_cirs_scaled = scaler.transform(reshaped_test_cirs)
test_cirs_dict = dict_learner.transform(test_cirs_scaled)

# Labels
alice_test_labels = np.zeros(alice_test_CIRs.shape[0])  
eve_test_labels = np.ones(eve_test_CIRs.shape[0])       
test_labels = np.hstack((alice_test_labels, eve_test_labels))

# Coefficients calculation
def find_sparse_coefficients(tSample, D, n_nonzero_coefs=15):
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(D, tSample)
    return omp.coef_

# Calculate residual
def calculate_residual(tSample, coefficients, class_indices, D):
    coef_class = np.zeros_like(coefficients)
    coef_class[class_indices] = coefficients[class_indices]  # Keep only coefficients for the specified class
    reconstructed_signal = D @ coef_class
    residual = np.linalg.norm(tSample - reconstructed_signal)
    return residual

# Classification function
def classify_signal(tSample, D, trainLabel):
    coefficients = find_sparse_coefficients(tSample, D)
    residuals = []

    unique_classes = np.unique(trainLabel)  # Classes: 0 and 1
    for class_label in unique_classes:
        class_indices = np.where(trainLabel == class_label)[0]  # Indices of atoms belonging to the class
        residual = calculate_residual(tSample, coefficients, class_indices, D)
        residuals.append(residual)

    # Predict the class with the smallest residual
    min_residual_index = np.argmin(residuals)
    predicted_class = unique_classes[min_residual_index]
    return predicted_class

# Classify the test data and evaluate the model
predictions = []

for testSample in test_cirs_dict:
    predicted_class = classify_signal(testSample, D, train_labels)
    predictions.append(predicted_class)

predictions = np.array(predictions)
accuracy = np.mean(predictions == test_labels)
print(f"Classification Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix and performance metrics
print(f"\nTotal testing samples: {test_labels.shape}")

tn, fp, fn, tp = confusion_matrix(test_labels, predictions, labels=[0, 1]).ravel()

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

# Missed Detection Rate (MDR)
MDR = fp / (fp + tn) if (fp + tn) != 0 else 0

# False Alarm Rate (FAR)
FAR = fn / (fn + tp) if (fn + tp) != 0 else 0

# Gamma calculation
gamma = (tp + fn) / (tn + fp) if (tn + fp) != 0 else 0

# Authentication Rate (AR)
AR = (tp + gamma * tn) / ((tp + fn) + gamma * (tn + fp)) if ((tp + fn) + gamma * (tn + fp)) != 0 else 0

print(f"MDR: {MDR}")
print(f"FAR: {FAR}")
print(f"AR: {AR}")


alice_train_CIRs: (800, 251, 2)
train_cirs: (1600, 251, 2)
train_cirs_dict: (1600, 20)
Dictionary : (502, 20)


ValueError: Found input variables with inconsistent numbers of samples: [502, 20]