In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer

# ─── DEVICE SELECTION ──────────────────────────────────────────
device = torch.device("mps" if torch.has_mps else "cpu")
print(f"Using device: {device}")

# ─── SIMULATE DATA ─────────────────────────────────────────────
# Dataset A → classes 0 and 1
X_A = torch.randn(1000, 14)
y_A = torch.randint(0, 2, (1000,))

# Dataset B → classes 0 and 2
X_B = torch.randn(500, 14) + 1.0  # shift distribution
y_B = torch.randint(0, 2, (500,))
y_B = y_B * 2  # labels 0 and 2

# ─── SCALE FEATURES ────────────────────────────────────────────
scaler = StandardScaler()
X_A_scaled = torch.tensor(scaler.fit_transform(X_A), dtype=torch.float32)
X_B_scaled = torch.tensor(scaler.transform(X_B), dtype=torch.float32)

# ─── CLASS DESCRIPTIONS → EMBEDDINGS ───────────────────────────
class_texts = {
    0: "normal behavior",
    1: "attack type A",
    2: "attack type B"
}

text_encoder = SentenceTransformer('all-MiniLM-L6-v2')

class_embeddings = {}
for c, text in class_texts.items():
    emb = text_encoder.encode(text, convert_to_tensor=True)
    class_embeddings[c] = emb / emb.norm()  # normalize

# ─── TIME-SERIES ENCODER MODEL ────────────────────────────────
class TimeSeriesEncoder(nn.Module):
    def __init__(self, input_dim, emb_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, emb_dim)
        )
        
    def forward(self, x):
        x = self.net(x)
        return x / x.norm(dim=-1, keepdim=True)  # normalize

# ─── PREPARE DATA ON DEVICE ────────────────────────────────────
embedding_dim = class_embeddings[0].shape[0]

# Move model to device
model = TimeSeriesEncoder(input_dim=14, emb_dim=embedding_dim).to(device)

# Move data to device
X_A_tensor = X_A_scaled.to(device)
y_A_tensor = y_A.to(device)
X_B_tensor = X_B_scaled.to(device)
y_B_tensor = y_B.to(device)

# Move class embeddings to device
for c in class_embeddings:
    class_embeddings[c] = class_embeddings[c].to(device)

# ─── TRAINING LOOP ON DATASET A ────────────────────────────────
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(20):
    model.train()
    optimizer.zero_grad()
    
    time_emb = model(X_A_tensor)
    
    class_emb_batch = torch.stack([class_embeddings[label.item()] for label in y_A_tensor])
    
    cos_sim = (time_emb * class_emb_batch).sum(dim=-1)
    loss = -cos_sim.mean()
    
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 5 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# ─── ZERO-SHOT EVALUATION ON DATASET B ─────────────────────────
model.eval()
with torch.no_grad():
    time_emb_B = model(X_B_tensor)
    
    # Similarity to all classes (0,1,2)
    all_class_emb = torch.stack([class_embeddings[c] for c in [0, 1, 2]])
    sims = torch.matmul(time_emb_B, all_class_emb.T)
    
    preds = sims.argmax(dim=1)
    pred_labels = torch.tensor([ [0,1,2][p] for p in preds ]).to(device)
    
    acc = (pred_labels == y_B_tensor).float().mean().item()
    print(f"\nZero-Shot Accuracy on Dataset B (with unseen class 2!): {acc:.4f}")


  device = torch.device("mps" if torch.has_mps else "cpu")


Using device: mps
Epoch 5, Loss: -0.1586
Epoch 10, Loss: -0.3182
Epoch 15, Loss: -0.4360
Epoch 20, Loss: -0.5192

Zero-Shot Accuracy on Dataset B (with unseen class 2!): 0.4560


In [3]:

print("MPS available:", torch.backends.mps.is_available())
print("MPS built:", torch.backends.mps.is_built())

MPS available: True
MPS built: True


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer
import random

# ─── DEVICE ────────────────────────────────────────────────────
device = torch.device("mps" if torch.has_mps else "cpu")
print(f"Using device: {device}")

# ─── DATA SIMULATION ───────────────────────────────────────────
# Domain A → attack X
X_A = torch.randn(1000, 14)
y_A = torch.randint(0, 2, (1000,))  # 0 = normal, 1 = attack X

# Domain B → attack Y
X_B = torch.randn(1000, 14) + 1.0
y_B = torch.randint(0, 2, (1000,))  # 0 = normal, 1 = attack Y

# Domain C → attack Z → Zero-Shot → No samples of class 1 seen in training!
X_C = torch.randn(500, 14) + 2.0
y_C = torch.randint(0, 2, (500,))  # 0 = normal, 1 = attack Z

# ─── FEATURE SCALING ───────────────────────────────────────────
scaler = StandardScaler()
X_A_scaled = torch.tensor(scaler.fit_transform(X_A), dtype=torch.float32)
X_B_scaled = torch.tensor(scaler.transform(X_B), dtype=torch.float32)
X_C_scaled = torch.tensor(scaler.transform(X_C), dtype=torch.float32)

# ─── CLASS DESCRIPTIONS ────────────────────────────────────────
class_texts = {
    0: "normal behavior",
    1: "port scan attack",         # attack X (Domain A)
    2: "brute force login attack"  # attack Z (Domain C - Zero-Shot)
}

text_encoder = SentenceTransformer('all-MiniLM-L6-v2')

class_embeddings = {}
for c, text in class_texts.items():
    emb = text_encoder.encode(text, convert_to_tensor=True)
    class_embeddings[c] = emb / emb.norm()

# ─── TIME-SERIES ENCODER ───────────────────────────────────────
class TimeSeriesEncoder(nn.Module):
    def __init__(self, input_dim, emb_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, emb_dim)
        )
        
    def forward(self, x):
        x = self.net(x)
        return x / x.norm(dim=-1, keepdim=True)

# ─── PREPARE DATA ──────────────────────────────────────────────
embedding_dim = class_embeddings[0].shape[0]
model = TimeSeriesEncoder(input_dim=14, emb_dim=embedding_dim).to(device)

# Move data to device
X_A_tensor = X_A_scaled.to(device)
y_A_tensor = y_A.to(device)
X_B_tensor = X_B_scaled.to(device)
y_B_tensor = y_B.to(device)
X_C_tensor = X_C_scaled.to(device)
y_C_tensor = y_C.to(device)

# Move class embeddings to device
for c in class_embeddings:
    class_embeddings[c] = class_embeddings[c].to(device)

# ─── REPLAY BUFFER ─────────────────────────────────────────────
replay_buffer = []

# ─── TRAIN FUNCTION ────────────────────────────────────────────
def train_on_domain(X, y, domain_name, epochs=50, replay_ratio=0.2):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        # Build batch with replay buffer
        replay_size = int(replay_ratio * len(replay_buffer))
        main_indices = random.sample(range(len(X)), 256)
        replay_indices = random.sample(range(len(replay_buffer)), replay_size) if replay_size > 0 else []
        
        # Current domain batch
        X_batch = X[main_indices]
        y_batch = y[main_indices]
        
        # Replay batch
        if replay_indices:
            X_replay = torch.stack([ replay_buffer[i][0] for i in replay_indices ])
            y_replay = torch.stack([ replay_buffer[i][1] for i in replay_indices ])
            
            X_batch = torch.cat([X_batch, X_replay], dim=0)
            y_batch = torch.cat([y_batch, y_replay], dim=0)
        
        # Forward
        time_emb = model(X_batch)
        class_emb_batch = torch.stack([class_embeddings[int(label.item())] for label in y_batch])
        
        # Contrastive loss → cosine similarity
        cos_sim = (time_emb * class_emb_batch).sum(dim=-1)
        loss = -cos_sim.mean()
        
        # Backprop
        loss.backward()
        optimizer.step()
        
        if (epoch+1) % 5 == 0:
            print(f"[{domain_name}] Epoch {epoch+1}, Loss: {loss.item():.4f}")
    
    # Update replay buffer
    for i in range(len(X)):
        if len(replay_buffer) < 500:  # Max buffer size
            replay_buffer.append( (X[i], y[i]) )
        else:
            # Replace random old sample
            j = random.randint(0, len(replay_buffer)-1)
            replay_buffer[j] = (X[i], y[i])

# ─── ZERO-SHOT EVAL ────────────────────────────────────────────
def zero_shot_eval(X, y, domain_name, eval_classes):
    model.eval()
    with torch.no_grad():
        time_emb = model(X)
        
        all_class_emb = torch.stack([class_embeddings[c] for c in eval_classes])
        sims = torch.matmul(time_emb, all_class_emb.T)
        
        preds = sims.argmax(dim=1)
        pred_labels = torch.tensor([ eval_classes[p] for p in preds ]).to(device)
        
        acc = (pred_labels == y).float().mean().item()
        print(f"\n[{domain_name}] Zero-Shot Accuracy on classes {eval_classes}: {acc:.4f}")

# ─── FULL TRAINING PIPELINE ────────────────────────────────────

# Step 1: Train on Domain A
print("\n=== Training on Domain A ===")
train_on_domain(X_A_tensor, y_A_tensor, domain_name="Domain A", epochs=10)

# Step 2: Continual Learning → Domain B
print("\n=== Continual Learning on Domain B ===")
train_on_domain(X_B_tensor, y_B_tensor, domain_name="Domain B", epochs=10)

# Step 3: Zero-Shot Inference on Domain C → unseen attack Z
print("\n=== Zero-Shot Evaluation on Domain C ===")
zero_shot_eval(X_C_tensor, y_C_tensor, domain_name="Domain C", eval_classes=[0, 1, 2])


  device = torch.device("mps" if torch.has_mps else "cpu")


Using device: mps

=== Training on Domain A ===
[Domain A] Epoch 5, Loss: -0.1406
[Domain A] Epoch 10, Loss: -0.2821

=== Continual Learning on Domain B ===
[Domain B] Epoch 5, Loss: -0.4113
[Domain B] Epoch 10, Loss: -0.5039

=== Zero-Shot Evaluation on Domain C ===

[Domain C] Zero-Shot Accuracy on classes [0, 1, 2]: 0.0000


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer
import random

# ─── DEVICE ────────────────────────────────────────────────────
device = torch.device("mps" if torch.has_mps else "cpu")
print(f"Using device: {device}")

# ─── DATA SIMULATION ───────────────────────────────────────────
# Domain A → attack X
X_A = torch.randn(1000, 14)
y_A = torch.randint(0, 2, (1000,))  # 0 = normal, 1 = attack X

# Domain B → attack Y
X_B = torch.randn(1000, 14) + 1.0
y_B = torch.randint(0, 2, (1000,))  # 0 = normal, 1 = attack Y

# Domain C → attack Z → Now also class 1 → ZSCL (no training!)
X_C = torch.randn(500, 14) + 2.0
y_C = torch.randint(0, 2, (500,))  # 0 = normal, 1 = attack Z

# ─── FEATURE SCALING ───────────────────────────────────────────
scaler = StandardScaler()
X_A_scaled = torch.tensor(scaler.fit_transform(X_A), dtype=torch.float32)
X_B_scaled = torch.tensor(scaler.transform(X_B), dtype=torch.float32)
X_C_scaled = torch.tensor(scaler.transform(X_C), dtype=torch.float32)

# ─── CLASS DESCRIPTIONS ────────────────────────────────────────
class_texts = {
    0: "normal behavior",
    1: "attack behavior"
}

text_encoder = SentenceTransformer('all-MiniLM-L6-v2')

class_embeddings = {}
for c, text in class_texts.items():
    emb = text_encoder.encode(text, convert_to_tensor=True)
    class_embeddings[c] = emb / emb.norm()

# ─── DOMAIN EMBEDDINGS ─────────────────────────────────────────
domain_texts = {
    "A": "domain A",
    "B": "domain B",
    "C": "domain C"
}

domain_embeddings = {}
for d, text in domain_texts.items():
    emb = text_encoder.encode(text, convert_to_tensor=True)
    domain_embeddings[d] = emb / emb.norm()

# ─── DOMAIN-CONDITIONED TIME-SERIES ENCODER ────────────────────
class DomainConditionedEncoder(nn.Module):
    def __init__(self, input_dim, domain_emb_dim, output_emb_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim + domain_emb_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_emb_dim)
        )
        
    def forward(self, x, domain_emb):
        domain_emb_expanded = domain_emb.unsqueeze(0).repeat(len(x), 1)
        x_cat = torch.cat([x, domain_emb_expanded], dim=1)
        x_out = self.net(x_cat)
        return x_out / x_out.norm(dim=-1, keepdim=True)

# ─── PREPARE DATA ──────────────────────────────────────────────
embedding_dim = class_embeddings[0].shape[0]
domain_emb_dim = domain_embeddings["A"].shape[0]

model = DomainConditionedEncoder(input_dim=14, domain_emb_dim=domain_emb_dim, output_emb_dim=embedding_dim).to(device)

# Move data to device
X_A_tensor = X_A_scaled.to(device)
y_A_tensor = y_A.to(device)
X_B_tensor = X_B_scaled.to(device)
y_B_tensor = y_B.to(device)
X_C_tensor = X_C_scaled.to(device)
y_C_tensor = y_C.to(device)

# Move class embeddings to device
for c in class_embeddings:
    class_embeddings[c] = class_embeddings[c].to(device)

# Move domain embeddings to device
for d in domain_embeddings:
    domain_embeddings[d] = domain_embeddings[d].to(device)

# ─── REPLAY BUFFER ─────────────────────────────────────────────
replay_buffer = []

# ─── TRAIN FUNCTION ────────────────────────────────────────────
def train_on_domain(X, y, domain_name, epochs=50, replay_ratio=0.2):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    domain_emb = domain_embeddings[domain_name]
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        # Build batch with replay buffer
        replay_size = int(replay_ratio * len(replay_buffer))
        main_indices = random.sample(range(len(X)), 256)
        replay_indices = random.sample(range(len(replay_buffer)), replay_size) if replay_size > 0 else []
        
        # Current domain batch
        X_batch = X[main_indices]
        y_batch = y[main_indices]
        
        # Replay batch
        if replay_indices:
            X_replay = torch.stack([ replay_buffer[i][0] for i in replay_indices ])
            y_replay = torch.stack([ replay_buffer[i][1] for i in replay_indices ])
            
            X_batch = torch.cat([X_batch, X_replay], dim=0)
            y_batch = torch.cat([y_batch, y_replay], dim=0)
        
        # Forward pass
        time_emb = model(X_batch, domain_emb)
        
        all_class_emb = torch.stack([class_embeddings[c] for c in class_texts.keys()])
        logits = torch.matmul(time_emb, all_class_emb.T)
        
        targets = torch.tensor([int(label.item()) for label in y_batch]).to(device)
        
        ce_loss = nn.CrossEntropyLoss()(logits, targets)
        
        ce_loss.backward()
        optimizer.step()
        
        if (epoch+1) % 5 == 0:
            print(f"[{domain_name}] Epoch {epoch+1}, Loss: {ce_loss.item():.4f}")
    
    # Update replay buffer
    for i in range(len(X)):
        if len(replay_buffer) < 500:  # Max buffer size
            replay_buffer.append( (X[i], y[i]) )
        else:
            j = random.randint(0, len(replay_buffer)-1)
            replay_buffer[j] = (X[i], y[i])

# ─── EVALUATION ────────────────────────────────────────────────
def evaluate(X, y, domain_name, eval_classes):
    model.eval()
    with torch.no_grad():
        domain_emb = domain_embeddings[domain_name]
        time_emb = model(X, domain_emb)
        
        all_class_emb = torch.stack([class_embeddings[c] for c in eval_classes])
        sims = torch.matmul(time_emb, all_class_emb.T)
        
        preds = sims.argmax(dim=1)
        pred_labels = torch.tensor([ eval_classes[p] for p in preds ]).to(device)
        
        acc = (pred_labels == y).float().mean().item()
        print(f"\n[{domain_name}] Accuracy on classes {eval_classes}: {acc:.4f}")

# ─── FULL ZSCL PIPELINE ────────────────────────────────────────

# Step 1: Train on Domain A
print("\n=== Training on Domain A ===")
train_on_domain(X_A_tensor, y_A_tensor, domain_name="A", epochs=500)

# Step 2: Continual Learning → Domain B
print("\n=== Continual Learning on Domain B ===")
train_on_domain(X_B_tensor, y_B_tensor, domain_name="B", epochs=500)

# 🚫 🚫 🚫 IMPORTANT: NO training on Domain C 🚫 🚫 🚫
# We skip:
# train_on_domain(X_C_tensor, y_C_tensor, domain_name="C", epochs=10)

# Step 3: Zero-Shot Evaluation on unseen Domain C!
print("\n=== Zero-Shot Evaluation on Domain C ===")
evaluate(X_C_tensor, y_C_tensor, domain_name="C", eval_classes=[0, 1])


  device = torch.device("mps" if torch.has_mps else "cpu")


Using device: mps

=== Training on Domain A ===
[A] Epoch 5, Loss: 0.6901
[A] Epoch 10, Loss: 0.6935
[A] Epoch 15, Loss: 0.6884
[A] Epoch 20, Loss: 0.6865
[A] Epoch 25, Loss: 0.6758
[A] Epoch 30, Loss: 0.6845
[A] Epoch 35, Loss: 0.6755
[A] Epoch 40, Loss: 0.6687
[A] Epoch 45, Loss: 0.6725
[A] Epoch 50, Loss: 0.6653
[A] Epoch 55, Loss: 0.6475
[A] Epoch 60, Loss: 0.6459
[A] Epoch 65, Loss: 0.6607
[A] Epoch 70, Loss: 0.6493
[A] Epoch 75, Loss: 0.6260
[A] Epoch 80, Loss: 0.6218
[A] Epoch 85, Loss: 0.6021
[A] Epoch 90, Loss: 0.6018
[A] Epoch 95, Loss: 0.6099
[A] Epoch 100, Loss: 0.5994
[A] Epoch 105, Loss: 0.6028
[A] Epoch 110, Loss: 0.5896
[A] Epoch 115, Loss: 0.5989
[A] Epoch 120, Loss: 0.5810
[A] Epoch 125, Loss: 0.5751
[A] Epoch 130, Loss: 0.5683
[A] Epoch 135, Loss: 0.5483
[A] Epoch 140, Loss: 0.5430
[A] Epoch 145, Loss: 0.5729
[A] Epoch 150, Loss: 0.5549
[A] Epoch 155, Loss: 0.5231
[A] Epoch 160, Loss: 0.5537
[A] Epoch 165, Loss: 0.5392
[A] Epoch 170, Loss: 0.5593
[A] Epoch 175, Loss: