BEATs Embedding Extraction Code

In [2]:
import os
import torch
import torchaudio
import numpy as np
from BEATs import BEATs, BEATsConfig

# ==== CONFIGURATION ====
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CHECKPOINT_PATH = "K:/DCASE/BEATs_finetuned.pt"
BASE_INPUT_DIR = "K:/DCASE/TestingData"
BASE_OUTPUT_DIR = "K:/DCASE/BEATs/Test_Embeddings"
MASK_PARAM = 80  # SpecAugment mask width

# ==== LOAD CHECKPOINT ====
print("🔄 Loading checkpoint...")
checkpoint = torch.load(CHECKPOINT_PATH, map_location=DEVICE)
cfg = BEATsConfig()
cfg.input_patch_size = (16, 16)
cfg.conv_bias = checkpoint["cfg"].get("conv_bias", False)

print("🚀 Initializing BEATs model...")
model = BEATs(cfg)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(DEVICE)
model.eval()
print("✅ BEATs model loaded successfully!")

# ==== SPEC-AUGMENT FUNCTION ====
def apply_specaugment(tensor, mask_param=80):
    """Apply time and frequency masking to the feature tensor."""
    tensor = tensor.unsqueeze(0)  # Shape: [1, Time, Feature]
    
    # Apply frequency masking
    freq_mask = torchaudio.transforms.FrequencyMasking(freq_mask_param=mask_param)
    tensor = freq_mask(tensor)

    # Apply time masking
    time_mask = torchaudio.transforms.TimeMasking(time_mask_param=mask_param)
    tensor = time_mask(tensor)

    return tensor.squeeze(0)  # Shape: [Time, Feature]

# ==== EMBEDDING EXTRACTION FUNCTION ====
def extract_beats_embedding(wav_file):
    try:
        waveform, sample_rate = torchaudio.load(wav_file)
        print(f"🔹 Loaded {wav_file}: Shape {waveform.shape}, Sample Rate {sample_rate}")
    except Exception as e:
        print(f"❌ Error loading {wav_file}: {e}")
        return None

    if waveform.shape[0] > 1:
        waveform = torch.mean(waveform, dim=0, keepdim=True)

    if sample_rate != 16000:
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        waveform = resampler(waveform)

    waveform = waveform.to(DEVICE)

    # Pad or trim to 30 seconds
    target_length = 16000 * 30
    current_length = waveform.shape[1]
    if current_length < target_length:
        waveform = torch.nn.functional.pad(waveform, (0, target_length - current_length))
    elif current_length > target_length:
        waveform = waveform[:, :target_length]

    print(f"🎵 Prepared waveform shape: {waveform.shape}")

    # Extract features (final layer)
    with torch.no_grad():
        try:
            features = model.extract_features(waveform)[0]  # Shape: [1, Time, Feature]
            print(f"🎛 Raw BEATs feature shape: {features.shape}")
            # Apply SpecAugment
            features = apply_specaugment(features.squeeze(0))  # Shape: [Time, Feature]
            return features.cpu().numpy()
        except Exception as e:
            print(f"❌ BEATs model error: {e}")
            return None

# ==== PROCESS ALL FILES ====
def process_all_wav_files(base_input_dir, base_output_dir):
    os.makedirs(base_output_dir, exist_ok=True)

    for machine in os.listdir(base_input_dir):
        machine_input_dir = os.path.join(base_input_dir, machine)
        machine_output_dir = os.path.join(base_output_dir, machine)
        os.makedirs(machine_output_dir, exist_ok=True)

        for file in os.listdir(machine_input_dir):
            if file.endswith(".wav"):
                input_path = os.path.join(machine_input_dir, file)
                output_path = os.path.join(machine_output_dir, f"BEATs_aug_{file.replace('.wav', '.npy')}")

                if os.path.exists(output_path):
                    print(f"⏭️ Skipping {file} - already processed")
                    continue

                embedding = extract_beats_embedding(input_path)
                if embedding is not None:
                    print(f"Saving embedding shape: {embedding.shape}")  # should be (T, 768), and T should be consistent
                    np.save(output_path, embedding)
                    print(f"💾 Saved augmented embedding to {output_path}")
                else:
                    print(f"❌ Skipping {file} due to errors.")

if __name__ == "__main__":
    process_all_wav_files(BASE_INPUT_DIR, BASE_OUTPUT_DIR)
    print("✅ All embeddings extracted with SpecAugment!")

🔄 Loading checkpoint...
🚀 Initializing BEATs model...


  WeightNorm.apply(module, name, dim)


✅ BEATs model loaded successfully!
🔹 Loaded K:/DCASE/TestingData\test_bearing\section_00_source_test_anomaly_0001_pro_A_vel_4_loc_A.wav: Shape torch.Size([1, 160000]), Sample Rate 16000
🎵 Prepared waveform shape: torch.Size([1, 480000])
🎛 Raw BEATs feature shape: torch.Size([1, 1496, 768])
Saving embedding shape: (1496, 768)
💾 Saved augmented embedding to K:/DCASE/BEATs/Test_Embeddings\test_bearing\BEATs_aug_section_00_source_test_anomaly_0001_pro_A_vel_4_loc_A.npy
🔹 Loaded K:/DCASE/TestingData\test_bearing\section_00_source_test_anomaly_0002_pro_A_vel_4_loc_A.wav: Shape torch.Size([1, 160000]), Sample Rate 16000
🎵 Prepared waveform shape: torch.Size([1, 480000])
🎛 Raw BEATs feature shape: torch.Size([1, 1496, 768])
Saving embedding shape: (1496, 768)
💾 Saved augmented embedding to K:/DCASE/BEATs/Test_Embeddings\test_bearing\BEATs_aug_section_00_source_test_anomaly_0002_pro_A_vel_4_loc_A.npy
🔹 Loaded K:/DCASE/TestingData\test_bearing\section_00_source_test_anomaly_0003_pro_A_vel_12_loc

In [22]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import time

class BEATsEmbeddingDataset(Dataset):
    def __init__(self, root_dir, debug=False):
        self.samples = []
        for machine_folder in os.listdir(root_dir):
            machine_path = os.path.join(root_dir, machine_folder)
            for file in os.listdir(machine_path):
                if file.endswith(".npy"):
                    self.samples.append(os.path.join(machine_path, file))
        
        if debug:
            print("[INFO] Debug mode enabled: using only 10 samples.")
            self.samples = self.samples[:10]  # Fast test

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path = self.samples[idx]
        try:
            data = np.load(path, mmap_mode='r').astype(np.float32)
        except Exception as e:
            print(f"[ERROR] Failed to load {path}: {e}")
            raise
        return torch.from_numpy(data), path


class Bottleneck(nn.Module):
    def __init__(self, in_dim=768, out_dim=256):
        super().__init__()
        self.linear = nn.Linear(in_dim, out_dim)

    def forward(self, x):  # x: (B, T, 768)
        return self.linear(x)

class StudentNet(nn.Module):
    def __init__(self, in_dim=256, out_dim=768):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_dim, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(512, out_dim, kernel_size=3, padding=1)
        )

    def forward(self, x):  # x: (B, T, 256)
        x = x.transpose(1, 2)  # (B, 256, T)
        out = self.net(x)
        return out.transpose(1, 2)  # (B, T, 768)

def cosine_loss(teacher, student):
    teacher_norm = F.normalize(teacher, dim=-1)
    student_norm = F.normalize(student, dim=-1)
    return 1 - (teacher_norm * student_norm).sum(dim=-1).mean()

def train_model(root_dir, batch_size=8, epochs=20, debug=False, save_dir="K:/DCASE"):
    print("[INFO] Using device:", device)

    dataset = BEATsEmbeddingDataset(root_dir, debug=debug)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True,
                            num_workers=0, pin_memory=True)

    model = nn.Sequential(Bottleneck(), StudentNet()).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    best_loss = float('inf')
    best_model_path = os.path.join(save_dir, "student_model_best.pth")
    final_model_path = os.path.join(save_dir, "student_model_last.pth")

    for epoch in range(epochs):
        start = time.time()
        model.train()
        running_loss = 0.0

        for teacher_embed, _ in dataloader:
            teacher_embed = teacher_embed.to(device, non_blocking=True)  # (B, T, 768)
            student_input = model[0](teacher_embed)
            student_output = model[1](student_input)
            loss = cosine_loss(teacher_embed, student_output)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        avg_loss = running_loss / len(dataloader)
        end = time.time()
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f} - Time: {end - start:.2f}s")

        # Save best model
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': best_loss
            }, best_model_path)
            print(f"[INFO] ✅ Best model updated (loss: {best_loss:.4f}) and saved to {best_model_path}")

    # Save final model
    torch.save({
        'epoch': epochs,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': avg_loss
    }, final_model_path)
    print(f"[INFO] 🧠 Final student model saved at {final_model_path}")
# CUDA device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
if __name__ == "__main__":
    train_model("C:/DCASE_Temp/BEATs/Embeddings", batch_size=16, epochs=20, debug=False)

[INFO] Using device: cuda
Epoch [1/20] - Loss: 0.1018 - Time: 134.08s
[INFO] ✅ Best model updated (loss: 0.1018) and saved to K:/DCASE\student_model_best.pth
Epoch [2/20] - Loss: 0.0547 - Time: 85.12s
[INFO] ✅ Best model updated (loss: 0.0547) and saved to K:/DCASE\student_model_best.pth
Epoch [3/20] - Loss: 0.0471 - Time: 132.86s
[INFO] ✅ Best model updated (loss: 0.0471) and saved to K:/DCASE\student_model_best.pth
Epoch [4/20] - Loss: 0.0433 - Time: 161.94s
[INFO] ✅ Best model updated (loss: 0.0433) and saved to K:/DCASE\student_model_best.pth
Epoch [5/20] - Loss: 0.0410 - Time: 86.20s
[INFO] ✅ Best model updated (loss: 0.0410) and saved to K:/DCASE\student_model_best.pth
Epoch [6/20] - Loss: 0.0394 - Time: 85.71s
[INFO] ✅ Best model updated (loss: 0.0394) and saved to K:/DCASE\student_model_best.pth
Epoch [7/20] - Loss: 0.0381 - Time: 85.39s
[INFO] ✅ Best model updated (loss: 0.0381) and saved to K:/DCASE\student_model_best.pth
Epoch [8/20] - Loss: 0.0371 - Time: 84.60s
[INFO] ✅ Be

In [55]:
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
import pandas as pd

class TestEmbeddingDataset(Dataset):
    def __init__(self, machine_dir):  # <- machine_dir is already like test_bearing/
        self.paths = [
            os.path.join(machine_dir, f)
            for f in os.listdir(machine_dir)
            if f.endswith(".npy")
        ]
        self.paths.sort()

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        data = np.load(path).astype(np.float32)
        return torch.from_numpy(data), os.path.basename(path)

def evaluate_model_per_machine(model_checkpoint_path, test_dir, label_dir, output_csv_path="all_anomaly_scores.csv"):
    # Load full student model
    checkpoint = torch.load(model_checkpoint_path)
    model = nn.Sequential(Bottleneck(), StudentNet()).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    all_results = []

    for machine_type in sorted(os.listdir(test_dir)):
        machine_path = os.path.join(test_dir, machine_type)
        label_path = os.path.join(label_dir, f"{machine_type}.csv")  # <-- Assumes label CSV is named after the machine

        if not os.path.isfile(label_path):
            print(f"[WARNING] Label file not found for {machine_type}. Skipping.")
            continue

        # Load dataset and dataloader
        dataset = TestEmbeddingDataset(machine_path)
        dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

        anomaly_scores = []

        with torch.no_grad():
            for x, filename in dataloader:
                x = x.to(device)  # (1, T, 768)
                student_input = model[0](x)
                output = model[1](student_input)

                teacher_norm = F.normalize(x, dim=-1)
                student_norm = F.normalize(output, dim=-1)
                cos_sim = (teacher_norm * student_norm).sum(dim=-1).mean().item()
                score = 1 - cos_sim  # Lower sim => higher anomaly

                anomaly_scores.append((filename[0], score))


        # Load labels
        df_labels = pd.read_csv(label_path)
        df_labels["filename"] = df_labels["filename"].apply(os.path.basename)

        df_scores = pd.DataFrame(anomaly_scores, columns=["filename", "score"])
        df = df_scores.merge(df_labels, on="filename")

        # Compute AUC and pAUC
        try:
            auc = roc_auc_score(df["label"], df["score"])
            p_auc = roc_auc_score(df["label"], df["score"], max_fpr=0.1)
        except ValueError:
            auc, p_auc = None, None
            print(f"[WARNING] Cannot compute AUC for {machine_type} (possibly only one class in labels).")

        print(f"[{machine_type}] AUC: {auc:.4f} | pAUC: {p_auc:.4f}" if auc else f"[{machine_type}] Skipped.")

        df["machine_type"] = machine_type
        df["AUC"] = auc
        df["pAUC"] = p_auc

        all_results.append(df)

    # Combine and save all scores
    if all_results:
        df_all = pd.concat(all_results, ignore_index=True)
        df_all.to_csv(output_csv_path, index=False)
        print(f"\n[INFO] All scores saved to: {output_csv_path}")
    else:
        print("[INFO] No valid evaluations were performed.")


In [56]:
evaluate_model_per_machine(
    model_checkpoint_path="K:/DCASE/student_model_best.pth",
    test_dir="K:/DCASE/BEATs/Test_Embeddings",
    label_dir="K:/DCASE/labels",  # Contains machine1.csv, machine2.csv, etc.
    output_csv_path="K:/DCASE/Eval/all_machine_scores.csv"
)


[test_bearing] AUC: 0.4852 | pAUC: 0.4958
[test_fan] AUC: 0.5162 | pAUC: 0.5042
[test_gearbox] AUC: 0.5009 | pAUC: 0.4911
[test_slider] AUC: 0.5160 | pAUC: 0.5132
[test_toycar] AUC: 0.5225 | pAUC: 0.5100
[test_toytrain] AUC: 0.6144 | pAUC: 0.5058
[test_valve] AUC: 0.5384 | pAUC: 0.5153


OSError: Cannot save file into a non-existent directory: 'K:\DCASE\Eval'

In [50]:
import os
import pandas as pd

test_embeddings_dir = r"K:\\DCASE\\BEATs\\Test_Embeddings"
label_output_dir = r"K:\\DCASE\\labels"
os.makedirs(label_output_dir, exist_ok=True)

for machine_type in os.listdir(test_embeddings_dir):
    machine_path = os.path.join(test_embeddings_dir, machine_type)
    if not os.path.isdir(machine_path):
        continue

    data = []
    for fname in os.listdir(machine_path):
        if fname.endswith(".npy") and "_test_" in fname:
            lower_fname = fname.lower()
            if "_test_anomaly_" in lower_fname:
                label = 1
            elif "_test_normal_" in lower_fname:
                label = 0
            else:
                print(f"[WARNING] Could not determine label for file: {fname}")
                continue
            data.append({"filename": fname, "label": label})

    if data:
        df = pd.DataFrame(data)
        output_path = os.path.join(label_output_dir, f"{machine_type}.csv")
        df.to_csv(output_path, index=False)
        print(f"[✅] Saved labels for {machine_type}: {output_path}")
    else:
        print(f"[⚠️] No valid test files found in {machine_type}")


[✅] Saved labels for test_bearing: K:\\DCASE\\labels\test_bearing.csv
[✅] Saved labels for test_fan: K:\\DCASE\\labels\test_fan.csv
[✅] Saved labels for test_gearbox: K:\\DCASE\\labels\test_gearbox.csv
[✅] Saved labels for test_slider: K:\\DCASE\\labels\test_slider.csv
[✅] Saved labels for test_toycar: K:\\DCASE\\labels\test_toycar.csv
[✅] Saved labels for test_toytrain: K:\\DCASE\\labels\test_toytrain.csv
[✅] Saved labels for test_valve: K:\\DCASE\\labels\test_valve.csv
