In [None]:
# ------------------------------------------------------------
# Imports
# ------------------------------------------------------------
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import scipy.signal as sg
import kagglehub

# ------------------------------------------------------------
# Kaggle Download
# ------------------------------------------------------------
path = kagglehub.dataset_download("hyelinnam/noisy-drone-rf-signal-classification-v2")
print("Dataset path:", path)

# Inspect folder
print(os.listdir(path))

# ------------------------------------------------------------
# Load IQ Data
# Assumes .npy files: shape = (N, 2) for I and Q
# ------------------------------------------------------------
iq_files = glob.glob(os.path.join(path, "**", "*.npy"), recursive=True)
print("Found IQ files:", len(iq_files))

signals = []
labels = []

for f in iq_files:
    arr = np.load(f)
    signals.append(arr)

    # Label determined from filename prefix
    lbl = os.path.basename(f).split("_")[0]
    labels.append(lbl)

signals = np.array(signals)
labels = np.array(labels)

print("Signals shape:", signals.shape)
print("Unique labels:", np.unique(labels))

# ------------------------------------------------------------
# Encode labels
# ------------------------------------------------------------
enc = LabelEncoder()
y = enc.fit_transform(labels)
class_names = list(enc.classes_)
print("Encoded classes:", class_names)

# ------------------------------------------------------------
# EDA: Waveform, Spectrum, Waterfall
# ------------------------------------------------------------
# Pick first example
idx = 0
iq = signals[idx]
I = iq[:,0]
Q = iq[:,1]
complex_sig = I + 1j*Q

# Set sampling rate if known; otherwise approximate
fs = 1e6  # modify if dataset metadata states otherwise

fig, ax = plt.subplots(3,1, figsize=(12,12))

# 1. Time-domain I/Q
ax[0].plot(I, label="I")
ax[0].plot(Q, label="Q")
ax[0].set_title("Time Domain I/Q")
ax[0].legend()

# 2. Magnitude Spectrum
f, Pxx = sg.welch(complex_sig, fs=fs, nperseg=1024)
ax[1].semilogy(f, Pxx)
ax[1].set_title("Magnitude Spectrum")

# 3. Waterfall / Spectrogram
f_s, t_s, Sxx = sg.spectrogram(complex_sig, fs=fs, nperseg=256, noverlap=128)
pcm = ax[2].pcolormesh(t_s, f_s, 10*np.log10(Sxx + 1e-12), shading="auto")
ax[2].set_title("Waterfall Spectrogram")
ax[2].set_ylabel("Frequency (Hz)")
ax[2].set_xlabel("Time (sec)")
plt.colorbar(pcm, ax=ax[2])

plt.tight_layout()
plt.show()


In [None]:
# ------------------------------------------------------------
# Preprocess for LSTM
# Standardize I/Q jointly per-feature.
# ------------------------------------------------------------
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay

X = signals.astype(np.float32)

# Standardize across entire dataset
scaler = StandardScaler()
X_flat = X.reshape(-1, 2)
X_flat = scaler.fit_transform(X_flat)
X = X_flat.reshape(X.shape)

print("Final X shape:", X.shape)
print("Final y shape:", y.shape)

# ------------------------------------------------------------
# Train / Val / Test split
# ------------------------------------------------------------
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)

# ------------------------------------------------------------
# LSTM / BiLSTM Model (same pattern as ViC notebook)
# ------------------------------------------------------------
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout

num_classes = len(class_names)
timesteps = X_train.shape[1]
features = X_train.shape[2]

model = Sequential()
model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(timesteps, features)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.3))
model.add(Dense(64, activation="relu"))
model.add(Dense(num_classes, activation="softmax"))

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(1e-3),
    metrics=["accuracy"]
)

model.summary()

# ------------------------------------------------------------
# Training
# ------------------------------------------------------------
early = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=64,
    callbacks=[early],
    verbose=2
)

# ------------------------------------------------------------
# Plot training curves
# ------------------------------------------------------------
plt.figure(figsize=(10,4))
plt.plot(history.history["accuracy"], label="train_acc")
plt.plot(history.history["val_accuracy"], label="val_acc")
plt.title("Training and Validation Accuracy")
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10,4))
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.title("Training and Validation Loss")
plt.legend()
plt.grid(True)
plt.show()

# ------------------------------------------------------------
# Evaluate on test set
# ------------------------------------------------------------
test_probs = model.predict(X_test, batch_size=64)
test_pred = np.argmax(test_probs, axis=1)

acc = accuracy_score(y_test, test_pred)
print("Test accuracy:", acc)

# ------------------------------------------------------------
# Confusion Matrix
# ------------------------------------------------------------
cm = confusion_matrix(y_test, test_pred)
fig, ax = plt.subplots(figsize=(8,6))
disp = ConfusionMatrixDisplay(cm, display_labels=class_names)
disp.plot(ax=ax, cmap="magma", xticks_rotation=45)
plt.title("LSTM Confusion Matrix")
plt.show()
