In [25]:
import os
import numpy as np
# Load the embeddings from the .npy file
embeddings = np.load("embeddings.npy")

# Read the image paths from output.txt
with open("image_paths.txt", "r") as f:
    image_paths = [line.strip() for line in f.readlines()]

# Ensure the number of embeddings matches the number of image paths
if len(embeddings) != len(image_paths):
    raise ValueError("Number of embeddings does not match the number of image paths.")

# Create a dictionary mapping image names (without .jpg) to embeddings
image_to_embedding = {
    os.path.splitext(os.path.basename(image_path))[0]: embedding
    for image_path, embedding in zip(image_paths, embeddings)
}



In [26]:
# # Example labeled dataset with varying length feature vectors
# X = [
#     [[0.5, 1.2, 0.7], [1.1, 0.3, 1.4]],  # Two feature vectors
#     [[0.2, 0.8, 1.0]],                   # One feature vector
#     [[1.4, 0.6, 1.2], [0.9, 1.0, 0.5]],  # Two feature vectors
#     [[0.3, 1.1]],                       # One feature vector
#     [[0.7, 0.5, 0.9], [0.1, 0.4, 0.6]]   # Two feature vectors
# ]

# # Labels (0 or 1 indicating compatibility)
# y = np.array([1, 0, 1, 0, 1])  # 1 = compatible, 0 = not compatible
# Open the file to read the data
with open('output.txt', 'r') as file:
    lines = file.readlines()

# Initialize the output vectors
y = []
X = []

# Process each line
for line in lines:
    parts = line.split()  # Split the line into parts
    first_item = parts[0]  # The first element, which is always '1'
    remaining_items = parts[1:]  # The remaining elements

    # Append the first item to the y list and the remaining items to the X list
    y.append(first_item)
    X.append(remaining_items)

# Convert y to integers (since it's all '1', this will create a list of 1's)
y = [int(i) for i in y]

for row in X:
    for i in range(len(row)):
        # Replace each element in the row with its corresponding value from the dictionary
        row[i] = image_to_embedding[row[i]]

In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split

class DeepSetsLSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout=0.2):
        super(DeepSetsLSTMClassifier, self).__init__()
        
        # Set bidirectional=True to make the LSTM bi-directional
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True, bidirectional=True)
        # The hidden_dim will now be doubled since it's bi-directional
        self.rho = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),  # Multiply hidden_dim by 2
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.Dropout(dropout)
            
        )
        
        # Change the final layer to have 1 output for binary classification
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x, mask=None):
        """
        Forward pass for the DeepSets model with LSTM
        
        Args:
            x: Input tensor of shape (batch_size, max_set_size, input_dim)
            mask: Boolean mask of shape (batch_size, max_set_size) indicating valid elements
        
        Returns:
            Tensor of shape (batch_size, 1) containing the logit for binary classification
        """
        batch_size, max_set_size, _ = x.shape
        
        # LSTM processing each element in the set
        lstm_out, _ = self.lstm(x)
        
        # If a mask is provided, apply it to the LSTM output
        if mask is not None:
            mask = mask.unsqueeze(-1)  # Expand mask dimension to match output shape
            lstm_out = lstm_out * mask
        
        # Aggregating the set elements by summing across the set
        x_aggregated = torch.sum(lstm_out, dim=1)
        
        # Pass through the rho network for final features
        x = self.rho(x_aggregated)
        
        # Output a single logit for binary classification
        x = self.fc(x)
        
        return x  # This is the logit, which will be passed to the loss function

class EmbeddingSetDataset(Dataset):
    def __init__(self, embedding_sets, labels, max_set_size=None):
        """
        Dataset for handling sets of numpy embedding arrays
        
        Args:
            embedding_sets: List of lists, where each inner list contains numpy arrays (embeddings)
            labels: List of labels for each set (0 or 1 for binary classification)
            max_set_size: Maximum size for padding sets. If None, uses size of largest set
        """
        self.embedding_sets = embedding_sets
        self.labels = labels
        self.max_set_size = max_set_size or max(len(s) for s in embedding_sets)
        
        # Validate that all embeddings have the same dimension
        embedding_dims = set(emb.shape[0] for subset in embedding_sets for emb in subset)
        if len(embedding_dims) > 1:
            raise ValueError("All embeddings must have the same dimension")
        self.embedding_dim = embedding_dims.pop()
    
    def __len__(self):
        return len(self.embedding_sets)
    
    def __getitem__(self, idx):
        embedding_set = self.embedding_sets[idx]
        label = self.labels[idx]
        
        # Create padded tensor for the embeddings
        padded_set = torch.zeros((self.max_set_size, self.embedding_dim))
        mask = torch.zeros(self.max_set_size, dtype=torch.bool)
        
        # Fill in actual embeddings and mask
        for i, embedding in enumerate(embedding_set):
            padded_set[i] = torch.from_numpy(embedding).float()
            mask[i] = True
        
        return padded_set, mask, torch.tensor(label)

def train_step(model, optimizer, data_loader, device):
    """Single training epoch"""
    model.train()
    total_loss = 0
    
    for batch_data, batch_mask, batch_labels in data_loader:
        batch_data = batch_data.to(device)
        batch_mask = batch_mask.to(device)
        batch_labels = batch_labels.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_data, batch_mask)
        
        # Use binary cross entropy loss for binary classification
        loss = F.binary_cross_entropy_with_logits(outputs.squeeze(), batch_labels.float())
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(data_loader)

def evaluate(model, data_loader, device):
    """Evaluation step on the validation or test dataset"""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_data, batch_mask, batch_labels in data_loader:
            batch_data = batch_data.to(device)
            batch_mask = batch_mask.to(device)
            batch_labels = batch_labels.to(device)
            
            outputs = model(batch_data, batch_mask)
            loss = F.binary_cross_entropy_with_logits(outputs.squeeze(), batch_labels.float())
            total_loss += loss.item()
            
            # Convert logits to binary predictions (0 or 1)
            predicted = torch.round(torch.sigmoid(outputs)).squeeze().long()
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()
    
    avg_loss = total_loss / len(data_loader)
    accuracy = 100 * correct / total
    return avg_loss, accuracy

def train_val_test_split(embedding_sets, labels, train_size=0.7, val_size=0.2, test_size=0.1, random_seed=42):
    """Split data into train, validation, and test sets"""
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        embedding_sets, labels, test_size=test_size, random_state=random_seed
    )
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=val_size / (train_size + val_size), random_state=random_seed
    )
    
    return (X_train, y_train), (X_val, y_val), (X_test, y_test)
# Example usage:
if __name__ == "__main__":
    # Example data
    (X_train, y_train), (X_val, y_val), (X_test, y_test) = train_val_test_split(X, y)

    # Create datasets and dataloaders
    train_dataset = EmbeddingSetDataset(X_train, y_train)
    val_dataset = EmbeddingSetDataset(X_val, y_val)
    test_dataset = EmbeddingSetDataset(X_test, y_test)

    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Initialize model with input dimension matching embedding dimension
    model = DeepSetsLSTMClassifier(input_dim=1280, hidden_dim=256)  # Adjust hidden_dim as needed
    optimizer = optim.AdamW(model.parameters(), lr=0.001)

    # Train model
    num_epochs = 50
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    for epoch in range(num_epochs):
        # Train step
        train_loss = train_step(model, optimizer, train_dataloader, device)
        
        # Validation step
        val_loss, val_accuracy = evaluate(model, val_dataloader, device)
        
        # Print loss and accuracy
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")
    
    # Test model
    test_loss, test_accuracy = evaluate(model, test_dataloader, device)
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")


Epoch 1/50, Train Loss: 0.6242, Val Loss: 0.6211, Val Accuracy: 64.56%
Epoch 2/50, Train Loss: 0.5786, Val Loss: 0.6104, Val Accuracy: 66.91%
Epoch 3/50, Train Loss: 0.5523, Val Loss: 0.6175, Val Accuracy: 64.02%
Epoch 4/50, Train Loss: 0.5240, Val Loss: 0.6108, Val Accuracy: 65.19%
Epoch 5/50, Train Loss: 0.4970, Val Loss: 0.6196, Val Accuracy: 64.56%
Epoch 6/50, Train Loss: 0.4820, Val Loss: 0.6436, Val Accuracy: 65.28%
Epoch 7/50, Train Loss: 0.4587, Val Loss: 0.6209, Val Accuracy: 65.83%
Epoch 8/50, Train Loss: 0.4315, Val Loss: 0.6386, Val Accuracy: 64.38%
Epoch 9/50, Train Loss: 0.3974, Val Loss: 0.6512, Val Accuracy: 64.02%
Epoch 10/50, Train Loss: 0.3824, Val Loss: 0.7623, Val Accuracy: 60.78%
Epoch 11/50, Train Loss: 0.3694, Val Loss: 0.6935, Val Accuracy: 61.68%
Epoch 12/50, Train Loss: 0.3499, Val Loss: 1.0224, Val Accuracy: 57.71%
Epoch 13/50, Train Loss: 0.3357, Val Loss: 0.7463, Val Accuracy: 58.97%
Epoch 14/50, Train Loss: 0.3247, Val Loss: 0.8408, Val Accuracy: 59.24%
E