In [None]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from ml_wireless_classification.base.SignalUtils import extract_comprehensive_features

# Define combined feature set for QAM16 vs QAM64 classification
QAM_FEATURES = [
    "SNR", "PAPR", "Kurtosis Magnitude", "Skewness Magnitude",
    "Adaptive Gaussian Filtering (Frequency Domain)", "Spectral Energy Density",
    "Autocorrelation Skewness (Quartic)", "Spectral Peak Ratio",
    "Energy Spread Time-Frequency (Gaussian)", "Phase Modulation Skewness (Quartic)",
    "Spectral Concentration Around Center", "Temporal Peak Density (Quadratic)",
    "Zero-Crossing Density (Frequency Domain)", "Frequency Modulation Rate",
    "Normalized High-Frequency Power Ratio", "Spectral Modulation Bandwidth (Quadratic)",
    "Amplitude Spectral Flatness", "Wavelet Entropy Multiple Scales (Quadratic)",
    "High-Frequency Spectral Entropy (Cubic)"
]

def filter_qam_data(data):
    filtered_data = {}
    for key, signals in data.items():
        mod_type, snr = key
        if mod_type in ["QAM16", "QAM64"]:
            filtered_data[key] = signals

    # Verify that only QAM16 and QAM64 remain in the filtered data
    unique_mod_types = set([mod_type for mod_type, _ in filtered_data.keys()])
    print(f"Filtered data contains {len(unique_mod_types)} unique modulation types: {unique_mod_types}")

    return filtered_data

def extract_qam_features(data, selected_features):
    features = []
    labels = []
    snr_values = []

    for key, signals in data.items():
        mod_type, snr = key
        for signal in signals:
            real_part, imag_part = signal[0], signal[1]
            complex_signal = real_part + 1j * imag_part

            # Compute features for real and imaginary parts separately
            real_feature_dict = extract_comprehensive_features(real_part, real_part, selected_features)
            imag_feature_dict = extract_comprehensive_features(imag_part, imag_part, selected_features)

            # Add SNR to both real and imaginary feature dictionaries
            real_feature_dict["SNR"] = snr
            imag_feature_dict["SNR"] = snr

            # Collect selected features for real and imaginary channels
            real_features = [real_feature_dict.get(feat, 0) for feat in selected_features]
            imag_features = [imag_feature_dict.get(feat, 0) for feat in selected_features]

            # Concatenate real and imaginary features
            combined_features = real_features + imag_features
            features.append(combined_features)
            labels.append(mod_type)
            snr_values.append(snr)  # Store the SNR for this sample

    print(f"Extracted {len(features)} samples with all selected features.")
    return np.array(features), labels, np.array(snr_values)

# Load data and filter for QAM16 and QAM64
with open("../RML2016.10a_dict.pkl", "rb") as f:
    data = pickle.load(f, encoding="latin1")

# Filter data to include only QAM16 and QAM64
qam_data = filter_qam_data(data)

# Extract features, labels, and SNR values
qam_features, qam_labels, snr_values = extract_qam_features(qam_data, QAM_FEATURES)

# Ensure we have sufficient data
if len(qam_features) == 0:
    raise ValueError("No data found after filtering. Please check the feature extraction and data filtering steps.")

# Encode labels to binary classes (0 for QAM16, 1 for QAM64)
qam_labels = np.array([0 if label == "QAM16" else 1 for label in qam_labels])

# Split the data into training and testing sets, including SNR values
X_train, X_test, y_train, y_test, snr_train, snr_test = train_test_split(
    qam_features, qam_labels, snr_values, test_size=0.3, random_state=42
)


In [None]:

# Train the RandomForestClassifier
qam_model = RandomForestClassifier(n_estimators=100, random_state=42)
qam_model.fit(X_train, y_train)


In [None]:

# Predictions and Evaluation
qam_pred = qam_model.predict(X_test)

# Compute accuracy per SNR
snr_accuracy = {}
unique_snr_values = np.unique(snr_test)

for snr in unique_snr_values:
    indices = np.where(snr_test == snr)
    accuracy = accuracy_score(y_test[indices], qam_pred[indices])
    snr_accuracy[snr] = accuracy

# Plotting accuracy per SNR
plt.figure(figsize=(10, 6))
plt.plot(list(snr_accuracy.keys()), list(snr_accuracy.values()), marker='o')
plt.xlabel("SNR (dB)")
plt.ylabel("Accuracy")
plt.title("Accuracy per SNR for QAM16 vs QAM64 Classification")
plt.grid(True)
plt.show()

# Display the confusion matrix for overall performance
print("QAM16 vs QAM64 Model Performance")
print(classification_report(y_test, qam_pred))
print(confusion_matrix(y_test, qam_pred))

# Visualize the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, qam_pred), annot=True, fmt='d', cmap='Blues', xticklabels=["QAM16", "QAM64"], yticklabels=["QAM16", "QAM64"])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix for QAM16 vs QAM64 Classification")
plt.show()


