In [2]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm

# -------- CONFIGURATION --------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EMBED_DIM = 768
LATENT_DIM = 128

# Epochs for each phase:
SOURCE_EPOCHS = 50
TARGET_EPOCHS = 10
LR = 1e-3

# Machine list as defined
MACHINE_LIST = ["bearing", "fan", "gearbox", "slider", "toycar", "toytrain", "valve"]

# Folder directories (adjust these base paths as needed)
BASE_PRETRAINED = r"C:/DCASE_Temp/PreTrained"
BASE_FINETUNED = r"C:/DCASE_Temp/FineTuned"
# Domain subfolders: "Source" and "Target"
DOMAIN_SOURCE = "Source"
DOMAIN_TARGET = "Target"


# -------- Gated Fusion Module --------
class GatedFusion(nn.Module):
    def __init__(self, embedding_dim=768):
        super().__init__()
        self.gate_layer = nn.Linear(embedding_dim * 2, embedding_dim)  # 1536 → 768
        self.sigmoid = nn.Sigmoid()

    def forward(self, e_frozen, e_qLoRA):
        concat = torch.cat([e_frozen, e_qLoRA], dim=-1)  # shape: [T, 1536]
        gate = self.sigmoid(self.gate_layer(concat))     # shape: [T, 768]
        return gate * e_qLoRA + (1 - gate) * e_frozen     # shape: [T, 768]


# -------- Autoencoder (CNN Mahalanobis AE) --------
class CNNMahalanobisAE(nn.Module):
    def __init__(self, latent_dim=128):
        super(CNNMahalanobisAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), nn.ReLU(),
            nn.Flatten(),
            nn.Linear(64 * 4 * 3, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64 * 4 * 3),
            nn.Unflatten(1, (64, 4, 3)),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=(1,0)), nn.ReLU(),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=(1,1)), nn.ReLU(),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=(1,0)), nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        recon = self.decoder(z)
        return recon, z


# -------- UTILITY FUNCTIONS --------
def to_img(tensor1d):
    """
    Converts a 1D tensor of size (786,) to image shape (1, 32, 24)
    Assumes that 32*24=768 is close enough to 786.
    If necessary, you can pad or reduce dimensions accordingly.
    Here we perform a simple view; adjust if your data actually needs it.
    """
    # If the input is larger than expected, you might slice it:
    tensor1d = tensor1d[:768]
    return tensor1d.view(1, 32, 24)

def load_fused_data(domain):
    """
    domain: "Source" or "Target"
    This function will traverse all machine folders under the given domain.
    It loads each .npy file from the PreTrained base and its corresponding file from the FineTuned base,
    fuses them using the GatedFusion module, and converts each time step into an image tensor.
    Returns a list of tensors, each of shape (1,32,24)
    """
    samples = []
    # Determine paths for the given domain:
    frozen_root = os.path.join(BASE_PRETRAINED, domain)
    qlora_root  = os.path.join(BASE_FINETUNED, domain)

    # Iterate over each machine type
    for machine in MACHINE_LIST:
        frozen_machine_dir = os.path.join(frozen_root, machine)
        qlora_machine_dir = os.path.join(qlora_root, machine)
        
        # Ensure directories exist
        if not os.path.exists(frozen_machine_dir) or not os.path.exists(qlora_machine_dir):
            print(f"Warning: Missing directory for machine {machine} in domain {domain}")
            continue

        # For each .npy file in the frozen directory:
        for f_file in glob.glob(os.path.join(frozen_machine_dir, "*.npy")):
            # The corresponding qLoRA file is assumed to have the same name
            q_file = os.path.join(qlora_machine_dir, os.path.basename(f_file))
            if not os.path.exists(q_file):
                print(f"Warning: QLoRA file {q_file} does not exist. Skipping.")
                continue

            # Load both embeddings: each is (1496, 786)
            try:
                emb_frozen = torch.tensor(np.load(f_file), dtype=torch.float32).to(DEVICE)
                emb_qlora  = torch.tensor(np.load(q_file), dtype=torch.float32).to(DEVICE)
            except Exception as e:
                print(f"Error loading {f_file} or {q_file}: {e}")
                continue

            # Fuse the embeddings for all time steps using the fusion model:
            fused = fusion_model(emb_frozen, emb_qlora)  # shape: (1496, 786)
            
            # For each time step (i.e. each row of 786 features), convert to image tensor:
            for timestep in fused:
                samples.append(to_img(timestep.cpu()))
    return samples


def train_autoencoder(model, data, epochs, lr=LR):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    # We iterate over each sample in our list.
    # (Depending on the number of samples, you might also consider batching.)
    for epoch in range(1, epochs + 1):
        total_loss = 0
        for x in tqdm(data, desc=f"Epoch {epoch}", leave=False):
            x = x.unsqueeze(0).to(DEVICE)  # shape: (1, 32, 24)
            recon, _ = model(x)
            loss = loss_fn(recon, x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(data)
        if epoch % 10 == 0 or epoch == 1:
            print(f"[Epoch {epoch:03d}] Loss: {avg_loss:.6f}")
    return model


# -------- MAIN PIPELINE: Phase-wise Training --------
# -------- MAIN PIPELINE: Phase-wise Training with Saving --------
if __name__ == "__main__":
    # Instantiate the fusion model (same across both phases)
    fusion_model = GatedFusion(EMBED_DIM).to(DEVICE)
    fusion_model.eval()

    # === Phase 1: Training on Source Domain ===
    print("=== Phase 1: Training on Source Domain ===")
    source_samples = load_fused_data("Source")
    print(f"Loaded {len(source_samples)} fused samples from Source domain.")

    # Initialize and train the autoencoder
    autoencoder = CNNMahalanobisAE(LATENT_DIM).to(DEVICE)
    autoencoder = train_autoencoder(autoencoder, source_samples, epochs=SOURCE_EPOCHS)

    # Save the model trained on source
    torch.save(autoencoder.state_dict(), "autoencoder_source.pth")
    print("Saved source-trained model to autoencoder_source.pth\n")

    # === Phase 2: Fine-tuning on Target Domain ===
    print("=== Phase 2: Fine-tuning on Target Domain ===")
    target_samples = load_fused_data("Target")
    print(f"Loaded {len(target_samples)} fused samples from Target domain.")

    # Re-load the source-trained model
    autoencoder = CNNMahalanobisAE(LATENT_DIM).to(DEVICE)
    autoencoder.load_state_dict(torch.load("autoencoder_source.pth", map_location=DEVICE))
    print("Loaded source-trained model weights for fine-tuning.")

    # Fine-tune on target domain
    autoencoder = train_autoencoder(autoencoder, target_samples, epochs=TARGET_EPOCHS)

    # Save final fine-tuned model
    torch.save(autoencoder.state_dict(), "autoencoder_finetuned.pth")
    print("Saved fine-tuned model to autoencoder_finetuned.pth\n")

    print("✅ Training pipeline completed.")
    print("Models saved successfully.")

=== Phase 1: Training on Source Domain ===
Error loading C:/DCASE_Temp/PreTrained\Source\bearing\BEATs_aug_section_00_source_train_normal_0001_pro_A_vel_4_loc_A.npy or C:/DCASE_Temp/FineTuned\Source\bearing\BEATs_aug_section_00_source_train_normal_0001_pro_A_vel_4_loc_A.npy: Unable to allocate 4.38 MiB for an array with shape (1148928,) and data type float32
Error loading C:/DCASE_Temp/PreTrained\Source\bearing\BEATs_aug_section_00_source_train_normal_0002_pro_A_vel_4_loc_A.npy or C:/DCASE_Temp/FineTuned\Source\bearing\BEATs_aug_section_00_source_train_normal_0002_pro_A_vel_4_loc_A.npy: Unable to allocate 4.38 MiB for an array with shape (1148928,) and data type float32
Error loading C:/DCASE_Temp/PreTrained\Source\bearing\BEATs_aug_section_00_source_train_normal_0003_pro_A_vel_4_loc_A.npy or C:/DCASE_Temp/FineTuned\Source\bearing\BEATs_aug_section_00_source_train_normal_0003_pro_A_vel_4_loc_A.npy: Unable to allocate 4.38 MiB for an array with shape (1148928,) and data type float32
Err

KeyboardInterrupt: 

In [1]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm

# -------- CONFIGURATION --------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EMBED_DIM = 768
LATENT_DIM = 128

SOURCE_EPOCHS = 50
TARGET_EPOCHS = 10
LR = 1e-3

MACHINE_LIST = ["bearing", "fan", "gearbox", "slider", "toycar", "toytrain", "valve"]

BASE_PRETRAINED = r"C:/DCASE_Temp/PreTrained"
BASE_FINETUNED = r"C:/DCASE_Temp/FineTuned"
DOMAIN_SOURCE = "Source"
DOMAIN_TARGET = "Target"

# -------- Gated Fusion --------
class GatedFusion(nn.Module):
    def __init__(self, embedding_dim=768):
        super().__init__()
        self.gate_layer = nn.Linear(embedding_dim * 2, embedding_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, e_frozen, e_qLoRA):
        concat = torch.cat([e_frozen, e_qLoRA], dim=-1)
        gate = self.sigmoid(self.gate_layer(concat))
        return gate * e_qLoRA + (1 - gate) * e_frozen


# -------- CNN Mahalanobis Autoencoder --------
class CNNMahalanobisAE(nn.Module):
    def __init__(self, latent_dim=128):
        super(CNNMahalanobisAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), nn.ReLU(),
            nn.Flatten(),
            nn.Linear(64 * 4 * 3, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64 * 4 * 3),
            nn.Unflatten(1, (64, 4, 3)),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=(1,0)), nn.ReLU(),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=(1,1)), nn.ReLU(),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=(1,0)), nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        recon = self.decoder(z)
        return recon, z


# -------- UTILS --------
def to_img(tensor1d):
    tensor1d = tensor1d[:768]
    return tensor1d.view(1, 32, 24)


def fuse_and_cache(domain, fusion_model, cache_file):
    """
    Fuses frozen and qLoRA embeddings, converts to image tensors, and saves to disk.
    Only done once per domain to avoid recomputing in future runs.
    """
    if os.path.exists(cache_file):
        print(f"📦 Loading cached fused data for {domain} from {cache_file}")
        return torch.load(cache_file)

    print(f"🔄 Fusing and caching data for {domain} ...")
    samples = []
    frozen_root = os.path.join(BASE_PRETRAINED, domain)
    qlora_root  = os.path.join(BASE_FINETUNED, domain)

    for machine in MACHINE_LIST:
        frozen_machine_dir = os.path.join(frozen_root, machine)
        qlora_machine_dir  = os.path.join(qlora_root, machine)

        if not os.path.exists(frozen_machine_dir) or not os.path.exists(qlora_machine_dir):
            print(f"⚠️ Missing directory for {machine} in {domain}. Skipping.")
            continue

        for f_file in glob.glob(os.path.join(frozen_machine_dir, "*.npy")):
            q_file = os.path.join(qlora_machine_dir, os.path.basename(f_file))
            if not os.path.exists(q_file):
                print(f"⚠️ Missing qLoRA file: {q_file}. Skipping.")
                continue

            try:
                emb_frozen_np = np.load(f_file, mmap_mode='r')
                emb_qlora_np  = np.load(q_file, mmap_mode='r')

            except Exception as e:
                print(f"❌ Error loading {f_file} or {q_file}: {e}")
                continue

            try:
                emb_frozen = torch.tensor(emb_frozen_np, dtype=torch.float32).to(DEVICE)
                emb_qlora  = torch.tensor(emb_qlora_np, dtype=torch.float32).to(DEVICE)

                with torch.no_grad():
                    fused = fusion_model(emb_frozen, emb_qlora).cpu()

                for timestep in fused:
                    samples.append(to_img(timestep))

                del emb_frozen, emb_qlora, fused
                torch.cuda.empty_cache()

            except RuntimeError as e:
                print(f"🔥 CUDA error while fusing {f_file}: {e}")
                torch.cuda.empty_cache()
                continue

    print(f"✅ Fused {len(samples)} samples from {domain}. Saving to {cache_file}")
    torch.save(samples, cache_file)
    return samples


def train_autoencoder(model, data, epochs, lr=LR):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    for epoch in range(1, epochs + 1):
        total_loss = 0
        for x in tqdm(data, desc=f"Epoch {epoch}", leave=False):
            x = x.unsqueeze(0).to(DEVICE)
            recon, _ = model(x)
            loss = loss_fn(recon, x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(data)
        if epoch % 10 == 0 or epoch == 1:
            print(f"[Epoch {epoch:03d}] Loss: {avg_loss:.6f}")
    return model


# -------- MAIN PIPELINE --------
if __name__ == "__main__":
    fusion_model = GatedFusion(EMBED_DIM).to(DEVICE)
    fusion_model.eval()

    # === Phase 1: Source Domain ===
    print("\n=== Phase 1: Training on Source Domain ===")
    source_cache = "fused_source.pt"
    source_samples = fuse_and_cache(DOMAIN_SOURCE, fusion_model, source_cache)

    autoencoder = CNNMahalanobisAE(LATENT_DIM).to(DEVICE)
    autoencoder = train_autoencoder(autoencoder, source_samples, epochs=SOURCE_EPOCHS)

    torch.save(autoencoder.state_dict(), "autoencoder_source.pth")
    print("💾 Saved source-trained model to autoencoder_source.pth")

    # === Phase 2: Target Domain ===
    print("\n=== Phase 2: Fine-tuning on Target Domain ===")
    target_cache = "fused_target.pt"
    target_samples = fuse_and_cache(DOMAIN_TARGET, fusion_model, target_cache)

    autoencoder = CNNMahalanobisAE(LATENT_DIM).to(DEVICE)
    autoencoder.load_state_dict(torch.load("autoencoder_source.pth", map_location=DEVICE))
    print("📥 Loaded source-trained model for fine-tuning.")

    autoencoder = train_autoencoder(autoencoder, target_samples, epochs=TARGET_EPOCHS)

    torch.save(autoencoder.state_dict(), "autoencoder_finetuned.pth")
    print("💾 Saved fine-tuned model to autoencoder_finetuned.pth")

    print("✅ All done. Pipeline completed.")



=== Phase 1: Training on Source Domain ===
🔄 Fusing and caching data for Source ...


: 