In [None]:
import os, glob, numpy as np, pandas as pd, kagglehub, torch, torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# ------------------------------------------------------------
# 1. Download dataset
# ------------------------------------------------------------
path = kagglehub.dataset_download("hyelinnam/vic-dataset-iq-signal-visualization-for-cbrs")
print("Dataset path:", path)

iq_files = sorted(glob.glob(os.path.join(path, "**", "*.csv"), recursive=True))
print("Num CSV files:", len(iq_files))


# ------------------------------------------------------------
# 2. Helper: extract metadata from filename
# Expect patterns like:
#    some_path/DEVICE_BAND_CHANNEL/file.csv
# ------------------------------------------------------------
def parse_metadata(filepath):
    dirname = os.path.basename(os.path.dirname(filepath))
    parts = dirname.split("_")

    # Metadata fallback
    device = 0
    band   = 0
    chan   = 0

    if len(parts) >= 3:
        try:
            device = int(parts[0].replace("D","").replace("dev",""))
        except:
            pass
        try:
            band = int(parts[1].replace("B","").replace("band",""))
        except:
            pass
        try:
            chan = int(parts[2].replace("C","").replace("ch","").replace("chan",""))
        except:
            pass

    return np.array([device, band, chan], dtype=np.float32)


# ------------------------------------------------------------
# 3. Load IQ, compute SNR, normalize each sample to unit power,
#    compute FFT magnitude, extract metadata
# ------------------------------------------------------------
def load_iq_csv(fp, seq_len=1024):
    df = pd.read_csv(fp)
    if "I" in df.columns and "Q" in df.columns:
        iq = df[["I","Q"]].values.astype(np.float32)
    else:
        iq = df.values[:, :2].astype(np.float32)

    # Pad/trim
    if len(iq) >= seq_len:
        iq_seq = iq[:seq_len]
    else:
        out = np.zeros((seq_len,2), np.float32)
        out[:len(iq)] = iq
        iq_seq = out

    # Unit-power normalization (per sample)
    power = np.mean(iq_seq[:,0]**2 + iq_seq[:,1]**2)
    power = max(power, 1e-12)
    iq_seq = iq_seq / np.sqrt(power)

    # Compute approximate SNR
    snr = 10.0 * np.log10(power)

    # FFT magnitude feature
    fft_mag = np.abs(np.fft.fft(iq_seq[:,0] + 1j*iq_seq[:,1]))
    fft_mag = fft_mag.astype(np.float32)

    # Metadata
    meta = parse_metadata(fp)

    return iq_seq, fft_mag, snr, meta


# ------------------------------------------------------------
# 4. Load entire dataset
# ------------------------------------------------------------
X_raw = []
FFT_raw = []
SNRs = []
META = []
y = []

for fp in iq_files:
    label = os.path.basename(os.path.dirname(fp))
    seq, fft_mag, snr, meta = load_iq_csv(fp)
    X_raw.append(seq)
    FFT_raw.append(fft_mag)
    SNRs.append(snr)
    META.append(meta)
    y.append(label)

X_raw = np.stack(X_raw)
FFT_raw = np.stack(FFT_raw)
SNRs = np.array(SNRs, dtype=np.float32).reshape(-1,1)
META = np.stack(META)
y = np.array(y)

labels = sorted(list(set(y)))
label_to_idx = {l:i for i,l in enumerate(labels)}
y_idx = np.array([label_to_idx[a] for a in y], dtype=np.int64)

print("X_raw:", X_raw.shape)
print("FFT_raw:", FFT_raw.shape)
print("META:", META.shape)
print("Classes:", labels)


# ------------------------------------------------------------
# 5. Scale metadata (StandardScaler)
# ------------------------------------------------------------
scaler_meta = StandardScaler()
META_scaled = scaler_meta.fit_transform(META)


# ------------------------------------------------------------
# 6. Train/val split
# ------------------------------------------------------------
from sklearn.model_selection import train_test_split

(
    X_train, X_val,
    FFT_train, FFT_val,
    SNR_train, SNR_val,
    META_train, META_val,
    y_train, y_val
) = train_test_split(
    X_raw, FFT_raw, SNRs, META_scaled, y_idx,
    test_size=0.2, random_state=42, stratify=y_idx
)

device = "cuda" if torch.cuda.is_available() else "cpu"

X_train_t = torch.tensor(X_train, device=device).float()
X_val_t   = torch.tensor(X_val, device=device).float()
FFT_train_t = torch.tensor(FFT_train, device=device).float()
FFT_val_t   = torch.tensor(FFT_val, device=device).float()
SNR_train_t = torch.tensor(SNR_train, device=device).float()
SNR_val_t   = torch.tensor(SNR_val, device=device).float()
META_train_t = torch.tensor(META_train, device=device).float()
META_val_t   = torch.tensor(META_val, device=device).float()
y_train_t = torch.tensor(y_train, device=device).long()
y_val_t   = torch.tensor(y_val, device=device).long()


# ------------------------------------------------------------
# 7. CNN + BiLSTM + FFT + Metadata fusion model
# ------------------------------------------------------------
class CNN_BiLSTM_Fusion(nn.Module):
    def __init__(self, classes):
        super().__init__()

        # 1D CNN feature extractor on IQ
        self.cnn = nn.Sequential(
            nn.Conv1d(2, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=5, padding=2),
            nn.ReLU()
        )

        # BiLSTM on CNN output
        self.lstm = nn.LSTM(
            input_size=128,
            hidden_size=128,
            num_layers=2,
            batch_first=True,
            bidirectional=True
        )

        # FFT feature reduction
        self.fft_fc = nn.Sequential(
            nn.Linear(1024, 256),
            nn.ReLU()
        )

        # Metadata + SNR MLP
        self.meta_fc = nn.Sequential(
            nn.Linear(1 + 3, 32),
            nn.ReLU()
        )

        # Final fusion layer
        self.fc = nn.Sequential(
            nn.Linear(128*2 + 256 + 32, 256),
            nn.ReLU(),
            nn.Linear(256, classes)
        )

    def forward(self, iq, fft_mag, snr, meta):
        # iq: (B,1024,2)
        x = iq.permute(0,2,1)      # (B,2,1024)
        x = self.cnn(x)            # (B,128,1024)
        x = x.permute(0,2,1)       # (B,1024,128)

        # LSTM over time
        lstm_out, _ = self.lstm(x)
        lstm_feat = lstm_out[:, -1]  # last timestep, shape (B,256)

        # FFT features
        fft_feat = self.fft_fc(fft_mag)

        # Metadata fusion
        meta_all = torch.cat([snr, meta], dim=1)
        meta_feat = self.meta_fc(meta_all)

        # Fuse all
        fused = torch.cat([lstm_feat, fft_feat, meta_feat], dim=1)
        return self.fc(fused)


model = CNN_BiLSTM_Fusion(classes=len(labels)).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()


# ------------------------------------------------------------
# 8. Training loop
# ------------------------------------------------------------
train_acc_hist = []
val_acc_hist = []

def train_epoch(batch=64):
    model.train()
    idx = torch.randperm(len(X_train_t))
    correct = 0
    total = 0

    for i in range(0, len(X_train_t), batch):
        sel = idx[i:i+batch]
        opt.zero_grad()

        pred = model(
            X_train_t[sel],
            FFT_train_t[sel],
            SNR_train_t[sel],
            META_train_t[sel]
        )
        loss = loss_fn(pred, y_train_t[sel])
        loss.backward()
        opt.step()

        correct += (pred.argmax(1) == y_train_t[sel]).sum().item()
        total += len(sel)

    return correct/total


def eval_epoch(batch=64):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i in range(0, len(X_val_t), batch):
            pred = model(
                X_val_t[i:i+batch],
                FFT_val_t[i:i+batch],
                SNR_val_t[i:i+batch],
                META_val_t[i:i+batch]
            )
            correct += (pred.argmax(1) == y_val_t[i:i+batch]).sum().item()
            total += len(pred)
    return correct/total


epochs = 10
for ep in range(1, epochs+1):
    tr = train_epoch()
    va = eval_epoch()
    train_acc_hist.append(tr)
    val_acc_hist.append(va)
    print(f"Epoch {ep}: TrainAcc={tr:.4f}  ValAcc={va:.4f}")


# ------------------------------------------------------------
# 9. Accuracy plot
# ------------------------------------------------------------
plt.figure(figsize=(8,5))
plt.plot(train_acc_hist, label="Train Acc")
plt.plot(val_acc_hist, label="Val Acc")
plt.grid(True)
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("CNN + BiLSTM + FFT + Metadata")
plt.legend()
plt.show()


# ------------------------------------------------------------
# 10. Confusion matrix
# ------------------------------------------------------------
model.eval()
pred_all = []

with torch.no_grad():
    for i in range(len(X_val_t)):
        p = model(
            X_val_t[i:i+1],
            FFT_val_t[i:i+1],
            SNR_val_t[i:i+1],
            META_val_t[i:i+1]
        ).argmax(1).item()
        pred_all.append(p)

cm = confusion_matrix(y_val, pred_all)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt="d", cmap="magma",
            xticklabels=labels,
            yticklabels=labels)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()


Downloading from https://www.kaggle.com/api/v1/datasets/download/hyelinnam/vic-dataset-iq-signal-visualization-for-cbrs?dataset_version_number=1...


100%|██████████| 2.81G/2.81G [00:23<00:00, 129MB/s] 

Extracting files...





In [None]:
import os
from pathlib import Path
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

# ------------------------------------------------------------
# Resolve project paths (same pattern as DeepRadar notebook)
# ------------------------------------------------------------
notebook_dir = Path().resolve()
project_root = notebook_dir.parent
models_dir = project_root / "models"
models_dir.mkdir(exist_ok=True)

model_path = models_dir / "vic_cnn_bilstm_fusion.keras"
print("Saving model to:", model_path)


# ------------------------------------------------------------
# Optional layer freezing (modify as needed)
# ------------------------------------------------------------
for p in model.parameters():
    p.requires_grad = True   # fine-tuning all layers


# ------------------------------------------------------------
# Fine-tuning optimizer + scheduler
# ------------------------------------------------------------
opt = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    opt, mode="max", factor=0.5, patience=3, verbose=True
)


# ------------------------------------------------------------
# Fine-tuning loops
# ------------------------------------------------------------
fine_train_acc = []
fine_val_acc = []

best_val = 0.0
best_state = None
patience = 8
wait = 0


def train_epoch_ft(batch=64):
    model.train()
    idx = torch.randperm(len(X_train_t))
    correct = 0
    total = 0

    for i in range(0, len(X_train_t), batch):
        sel = idx[i:i+batch]
        opt.zero_grad()

        pred = model(
            X_train_t[sel],
            FFT_train_t[sel],
            SNR_train_t[sel],
            META_train_t[sel]
        )

        loss = loss_fn(pred, y_train_t[sel])
        loss.backward()
        opt.step()

        correct += (pred.argmax(1) == y_train_t[sel]).sum().item()
        total += len(sel)

    return correct / total


def eval_epoch_ft(batch=64):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i in range(0, len(X_val_t), batch):
            pred = model(
                X_val_t[i:i+batch],
                FFT_val_t[i:i+batch],
                SNR_val_t[i:i+batch],
                META_val_t[i:i+batch]
            )
            correct += (pred.argmax(1) == y_val_t[i:i+batch]).sum().item()
            total += len(pred)

    return correct / total


# ------------------------------------------------------------
# Run fine-tuning
# ------------------------------------------------------------
epochs_ft = 20

for ep in range(1, epochs_ft + 1):
    tr = train_epoch_ft()
    va = eval_epoch_ft()

    fine_train_acc.append(tr)
    fine_val_acc.append(va)

    print(f"FT Epoch {ep}: TrainAcc={tr:.4f}  ValAcc={va:.4f}")

    scheduler.step(va)

    # Early stopping
    if va > best_val:
        best_val = va
        best_state = model.state_dict()
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping.")
            break


# Load best model weights
if best_state is not None:
    model.load_state_dict(best_state)

print("Best validation accuracy:", best_val)


# ------------------------------------------------------------
# Save the final model
# ------------------------------------------------------------
torch.save(model.state_dict(), model_path)
print("Model saved.")


# ------------------------------------------------------------
# Accuracy curves
# ------------------------------------------------------------
plt.figure(figsize=(8,5))
plt.plot(fine_train_acc, label="Train (FT)")
plt.plot(fine_val_acc, label="Val (FT)")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Fine-Tuning Accuracy Curves")
plt.grid(True)
plt.legend()
plt.show()


# ------------------------------------------------------------
# Confusion matrix after fine-tuning
# ------------------------------------------------------------
model.eval()
pred_all = []

with torch.no_grad():
    for i in range(len(X_val_t)):
        p = model(
            X_val_t[i:i+1],
            FFT_val_t[i:i+1],
            SNR_val_t[i:i+1],
            META_val_t[i:i+1]
        ).argmax(1).item()
        pred_all.append(p)

cm = confusion_matrix(y_val, pred_all)

plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt="d",
            cmap="magma",
            xticklabels=labels,
            yticklabels=labels)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix (After Fine-Tuning)")
plt.show()
