In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import json, os, glob
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset

from dataset.echo_npy_dataset import EchoNPYDataset
from models.model_cnn import CNNWordClassifier

In [2]:
with open("config.json","r") as f:
    cfg = json.load(f)

audio_files = cfg["audio"       ]["files"]       # ["audio002.raw", …]
gt_texts    = cfg["ground_truth"]["files"]       # ["record_…_records.txt", …]

datasets_list = []
for raw_fn, gt_fn in zip(audio_files, gt_texts):
    base = os.path.splitext(raw_fn)[0]
    npy  = f"echo_profile/{base}_fmcw_16bit_diff_profiles.npy"
    txt  = gt_fn.replace(".txt","_labeled.txt")
    txt  = os.path.join("transcripts", txt)
    assert os.path.exists(npy), f"Missing {npy}"
    assert os.path.exists(txt), f"Missing {txt}"
    datasets_list.append(EchoNPYDataset(npy, txt))

full_ds = ConcatDataset(datasets_list)
print("Total samples:", len(full_ds))

Total samples: 60


In [3]:
lengths = []
for ds in datasets_list:
    # each EchoNPYDataset has a `.data` numpy array of shape (N_i, L_i)
    lengths.append(ds.data.shape[1])
max_len = max(lengths)
print("Global max sequence length =", max_len)

Global max sequence length = 9032


In [4]:
class PaddedDS(Dataset):
    def __init__(self, base_ds, encoder, max_len):
        self.ds      = base_ds
        self.encoder = encoder
        self.max_len = max_len

    def __len__(self):
        return len(self.ds)

    def __getitem__(self, i):
        x, y_str = self.ds[i]              # x: torch.Tensor([L_i]), y_str: str
        y        = self.encoder.transform([y_str])[0]

        L = x.size(0)
        if L < self.max_len:
            pad = torch.zeros(self.max_len - L, dtype=x.dtype)
            x   = torch.cat([x, pad], dim=0)
        else:
            x   = x[:self.max_len]

        return x, y

In [5]:
all_labels = [ full_ds[i][1] for i in range(len(full_ds)) ]
le         = LabelEncoder().fit(all_labels)

wrapped = PaddedDS(full_ds, le, max_len)

# stratified train/test split
idx        = list(range(len(wrapped)))
labels_int = [wrapped[i][1] for i in idx]    # now ints

train_idx, test_idx = train_test_split(
    idx, test_size=0.2, random_state=42, stratify=labels_int
)
train_ds = Subset(wrapped, train_idx)
test_ds  = Subset(wrapped, test_idx)

BATCH_SIZE = 8
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE)
print("Train size:", len(train_ds), "  Test size:", len(test_ds))

Train size: 48   Test size: 12


In [6]:
device      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seq_len     = max_len                #  ← now fixed across every batch
num_classes = len(le.classes_)

model   = CNNWordClassifier(seq_len, num_classes).to(device)
opt     = torch.optim.Adam(model.parameters(), lr=1e-3)
crit    = torch.nn.CrossEntropyLoss()


In [7]:
for epoch in range(1, 11):
    model.train()
    total_loss = 0.0
    for X, y in train_loader:
        # X: (B, max_len), y: (B,)
        X = X.to(device).unsqueeze(1)   # -> (B,1,max_len)
        y = y.to(device)

        opt.zero_grad()
        out  = model(X)
        loss = crit(out, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()
    model.eval()
    val_corr = val_tot = 0
    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device).unsqueeze(1), y.to(device)
            p    = model(X).argmax(dim=1)
            val_corr += (p==y).sum().item()
            val_tot  += y.size(0)
    print(f"Epoch {epoch} — Train Loss: {total_loss/len(train_loader):.3f}, val acc: {100*val_corr/val_tot:.1f}%")
    print(f"Epoch {epoch}/10, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1 — Train Loss: 6.266, val acc: 0.0%
Epoch 1/10, Loss: 6.2661
Epoch 2 — Train Loss: 2.967, val acc: 0.0%
Epoch 2/10, Loss: 2.9667
Epoch 3 — Train Loss: 2.425, val acc: 0.0%
Epoch 3/10, Loss: 2.4254
Epoch 4 — Train Loss: 2.121, val acc: 0.0%
Epoch 4/10, Loss: 2.1207
Epoch 5 — Train Loss: 1.974, val acc: 0.0%
Epoch 5/10, Loss: 1.9743
Epoch 6 — Train Loss: 1.805, val acc: 0.0%
Epoch 6/10, Loss: 1.8052
Epoch 7 — Train Loss: 1.616, val acc: 0.0%
Epoch 7/10, Loss: 1.6160
Epoch 8 — Train Loss: 1.316, val acc: 0.0%
Epoch 8/10, Loss: 1.3162
Epoch 9 — Train Loss: 0.999, val acc: 0.0%
Epoch 9/10, Loss: 0.9987
Epoch 10 — Train Loss: 0.787, val acc: 0.0%
Epoch 10/10, Loss: 0.7870


In [8]:
model.eval()
train_correct = train_total = 0
with torch.no_grad():
    for X, y in train_loader:
        X, y    = X.to(device).unsqueeze(1), y.to(device)
        preds   = model(X).argmax(dim=1)
        train_correct += (preds == y).sum().item()
        train_total   += y.size(0)
print(f"Train accuracy: {100*train_correct/train_total:.1f}%")

Train accuracy: 91.7%


In [9]:
model.eval()
correct = total = 0
with torch.no_grad():
    for X, y in test_loader:
        X = X.to(device).unsqueeze(1)
        y = y.to(device)
        preds = model(X).argmax(dim=1)
        correct += (preds==y).sum().item()
        total   += y.size(0)
print(f"Test accuracy: {100*correct/total:.1f}%")


Test accuracy: 0.0%


In [10]:
Xb, yb = next(iter(test_loader))
Xb, yb = Xb.to(device).unsqueeze(1), yb.to(device)
with torch.no_grad():
    pb = model(Xb).argmax(dim=1)

# decode back to words
pred_words = le.inverse_transform(pb.cpu().numpy())
true_words = le.inverse_transform(yb.cpu().numpy())

print("Predicted   vs   True")
for p, t in zip(pred_words, true_words):
    print(f"  {p:>5s}   ≠   {t}")
print("Classes in label‐encoder:", le.classes_)

Predicted   vs   True
   Zero   ≠   Seven
   Five   ≠   Nine
   Nine   ≠   One
   Four   ≠   Five
    One   ≠   Nine
   Four   ≠   Three
   Zero   ≠   Two
    Two   ≠   Six
Classes in label‐encoder: ['Eight' 'Five' 'Four' 'Nine' 'One' 'Seven' 'Six' 'Three' 'Two' 'Zero']
