In [None]:
import pickle
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from ml_wireless_classification.base.SignalUtils import extract_comprehensive_features


# Global dictionary to store feature names and values
feature_dict = {}

def extract_features(data):
    features = []
    labels = []
    snrs = []
 
    for key, signals in data.items():
        mod_type, snr = key
        for signal in signals:
            real_part, imag_part = signal[0], signal[1]
            complex_signal = real_part + 1j * imag_part

            # Reset feature dictionary for each signal
            global feature_dict
            feature_dict = {}

            # Apply additional QAM-specific features
            feature_dict = extract_comprehensive_features(complex_signal, real_part)

            # Add SNR as a feature
            feature_dict["SNR"] = snr  # Include SNR as part of the features

            # Append the feature values and label
            features.append(list(feature_dict.values()))
            labels.append(mod_type)

    return np.array(features), labels

# Load the RML2016.10a_dict.pkl file with explicit encoding
with open("../RML2016.10a_dict.pkl", "rb") as f:
    data = pickle.load(f, encoding="latin1")

# Feature extraction for all signals
features, labels = extract_features(data)


In [None]:

from ml_wireless_classification.base.TestingUtils import clean_training_data, ensure_2d

# Encode labels for classification
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.3, random_state=42)


# Clean training and test data
X_train, y_train = clean_training_data(X_train, y_train)
X_test, y_test = clean_training_data(X_test, y_test)

# Ensure both X_train and X_test are 2D arrays
X_train = ensure_2d(X_train, "X_train")
X_test = ensure_2d(X_test, "X_test")


In [None]:

# Train a single classifier on the entire dataset
clf = RandomForestClassifier(n_estimators=1000, random_state=42)
clf.fit(X_train, y_train)


In [None]:
from ml_wireless_classification.base.TestingUtils import plot_accuracy_v_snr_per_classification
from ml_wireless_classification.base.TestingUtils import plot_confusion_matrix, plot_feature_importance
from ml_wireless_classification.base.TestingUtils import plot_accuracy_per_snr

plot_accuracy_per_snr(clf, X_test, y_test)
plot_accuracy_v_snr_per_classification(clf, X_test, y_test, label_encoder)
plot_feature_importance(clf, feature_dict, X_test, y_test)
plot_confusion_matrix( X_test, y_test, label_encoder)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.inspection import permutation_importance

# Specify the modulation types of interest
modulation_types_of_interest = ["WBFM", "AM-DSB", "QAM16", "QAM64"]

# Initialize an empty dictionary to hold feature importance per se lected modulation type
modulation_importances = {}
feature_names = list(feature_dict.keys())

# Calculate permutation importance for each selected modulation type
for modulation_type in modulation_types_of_interest:
    # Get the index for the current modulation type from the label encoder
    if modulation_type in label_encoder.classes_:
        i = np.where(label_encoder.classes_ == modulation_type)[0][0]
        
        # Get indices for the specific modulation type in the test set
        class_indices = np.where(y_test == i)
        
        # Calculate permutation importance only on this modulation type subset
        result = permutation_importance(clf, X_test[class_indices], y_test[class_indices], n_repeats=2, random_state=42)
        
        # Sort the importances
        sorted_indices = np.argsort(result.importances_mean)[::-1]
        sorted_feature_names = [feature_names[j] for j in sorted_indices]
        sorted_importances = result.importances_mean[sorted_indices]
        
        # Store sorted feature importances for this modulation type
        modulation_importances[modulation_type] = (sorted_feature_names, sorted_importances)

        # Plot top feature importances for this modulation type
        plt.figure(figsize=(10, 8))
        plt.barh(sorted_feature_names[:10], sorted_importances[:10], color='skyblue')
        plt.xlabel("Feature Importance")
        plt.title(f"Top 10 Feature Importances for Modulation Type: {modulation_type}")
        plt.gca().invert_yaxis()  # Invert y-axis to show the highest importance at the top
        plt.show()
