In [4]:
!pip install wandb -qU
import wandb
wandb.login()


[notice] A new release of pip is available: 24.1.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtnartsch[0m ([33mda2cs[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:

!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121, https://pypi.ngc.nvidia.com


In [6]:
!pip install tqdm torchsummary

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com



[notice] A new release of pip is available: 24.1.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
from tqdm import tqdm
import random
from torchsummary import summary

In [34]:
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=5, padding=0)
        self.bn2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=0)
        self.bn3 = nn.BatchNorm2d(32)

        # Fully connected layers
        self.fc1 = nn.Linear(32 * 12 * 12, 41)  # Updated to 32 * 12 * 12
        self.fc1komma5 = nn.Linear(41,32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)
        self.dropout = nn.Dropout(0.5)

    def forward_one(self, x):
        x = F.relu(self.conv1(x)) # 8 * 112 * 112
        x = F.max_pool2d(x, 2)  # output size: (8, 56, 56)
        x = F.relu(self.conv2(x)) # 16* 52 * 52
        x = F.max_pool2d(x, 2)  # output size: (16, 26, 26)
        x = F.relu(self.conv3(x)) # 32 * 24 * 24
        x = F.max_pool2d(x, 2)  # output size: (32, 12, 12)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc1komma5(x))
        x = F.relu(self.fc2(x))
        return x

    def forward(self, input1, input2):
        output1 = self.forward_one(input1)
        output2 = self.forward_one(input2)
        distance = torch.abs(output1 - output2)
        output = self.fc3(distance)
        return output

In [10]:
from multiprocessing import Pool  # Import Pool for multiprocessing

# Import your worker function from workers.py
import workers

class FaceDataset(Dataset):
    def __init__(self, image_folder, people_dirs, transform=None):
        self.image_folder = image_folder
        self.people_dirs = people_dirs
        self.transform = transform
        self.image_pairs = []
        self.labels = []
        self._prepare_data()

    def _prepare_data(self):
        for person_dir in self.people_dirs:
            person_path = os.path.join(self.image_folder, person_dir)
            images = os.listdir(person_path)
            for i in range(len(images)):
                for j in range(i + 1, len(images)):
                    self.image_pairs.append((os.path.join(person_path, images[i]), os.path.join(person_path, images[j])))
                    self.labels.append(1)

                    # Add negative samples
                    neg_person = person_dir
                    while neg_person == person_dir:
                        neg_person = random.choice(self.people_dirs)

                    neg_images = os.listdir(os.path.join(self.image_folder, neg_person))
                    random_image_index = random.randrange(start=0, stop=len(neg_images))
                    self.image_pairs.append((os.path.join(person_path, images[i]), os.path.join(self.image_folder, neg_person, neg_images[random_image_index])))
                    self.labels.append(0)

    def __len__(self):
        return len(self.image_pairs)

    def __getitem__(self, idx):
        img1_path, img2_path = self.image_pairs[idx]
        label = self.labels[idx]
        img1 = Image.open(img1_path).convert('L')
        img2 = Image.open(img2_path).convert('L')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# Function to split dataset
def split_dataset(image_folder, train_ratio=0.9, val_ratio=0.05, test_ratio=0.05):
    people_dirs = os.listdir(image_folder)
    random.shuffle(people_dirs)

    train_end = int(train_ratio * len(people_dirs))
    val_end = train_end + int(val_ratio * len(people_dirs))

    train_dirs = people_dirs[:train_end]
    val_dirs = people_dirs[train_end:val_end]
    test_dirs = people_dirs[val_end:]

    return train_dirs, val_dirs, test_dirs

# Initialize wandb
wandb.init(project='face-recognition-philip')

# Hyperparameters and setup
batch_size = 512
learning_rate = 0.02
epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wandb.config.update({
    "batch_size": batch_size,
    "learning_rate": learning_rate,
    "epochs": epochs,
    "device": str(device)
})

print(f'Batch size: {batch_size}')
print(f'LR: {learning_rate}')
print(f'Epochs: {epochs}')
print(f'Device: {device}')

# Data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor()
])

# Load dataset
image_folder = 'generated_images_10Kids_cropped'  # Update with the path to your dataset
train_dirs, val_dirs, test_dirs = split_dataset(image_folder)

train_dataset = FaceDataset(image_folder, train_dirs, transform=transform)
val_dataset = FaceDataset(image_folder, val_dirs, transform=transform)
test_dataset = FaceDataset(image_folder, test_dirs, transform=transform)
# Use multiprocessing to load data

# Load data using multiprocessing
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model, loss, and optimizer
model = SiameseNetwork().to(device)
summary(model, [(1, 112, 112), (1, 112, 112)])
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
scaler = torch.amp.GradScaler('cuda')

# Training script with validation
def train(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    accumulation_steps = 4
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{epochs}", unit="batch") as pbar:
            for i,(img1, img2, label) in enumerate(train_loader):
                img1, img2, label = img1.to(device), img2.to(device), label.to(device)
                with torch.amp.autocast('cuda'):
                    outputs = model(img1, img2).squeeze()
                    loss = criterion(outputs, label)
                    loss = loss / accumulation_steps
                scaler.scale(loss).backward()
                if (i+1) % accumulation_steps == 0:
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                running_loss += loss.item() * accumulation_steps
                pbar.set_postfix(loss=running_loss / (pbar.n + 1))
                pbar.update(1)
        scheduler.step()
        val_loss, val_accuracy = evaluate(model, val_loader, criterion)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {running_loss/len(train_loader)}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": running_loss / len(train_loader),
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        })

        # Save the model
        torch.save(model.state_dict(), f'networks/network_epoch{epoch}.pth')

# Evaluation function
def evaluate(model, data_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for img1, img2, label in data_loader:
            img1, img2, label = img1.to(device), img2.to(device), label.to(device)
            with torch.amp.autocast('cuda'):
                outputs = model(img1, img2).squeeze()
                loss = criterion(outputs, label)
            running_loss += loss.item()
            predicted = (outputs > 0).float()
            correct += (predicted == label).sum().item()
            total += label.size(0)
    accuracy = correct / total
    return running_loss / len(data_loader), accuracy

# Train the model
train(model, train_loader, val_loader, criterion, optimizer, epochs=epochs)

# Evaluate on test set
test_loss, test_accuracy = evaluate(model, test_loader, criterion)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

# Log final test metrics to wandb
wandb.log({
    "test_loss": test_loss,
    "test_accuracy": test_accuracy
})

# Finish wandb run
wandb.finish()

0,1
epoch,▁▂▄▅▇█
train_loss,█▃▁▁▁▁
val_accuracy,▁▇▇▇██
val_loss,█▂▂▂▁▁

0,1
epoch,6.0
train_loss,0.42408
val_accuracy,0.80559
val_loss,0.42701


Batch size: 512
LR: 0.02
Epochs: 10
Device: cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 8, 112, 112]              80
            Conv2d-2           [-1, 16, 52, 52]           3,216
            Conv2d-3           [-1, 32, 24, 24]           4,640
            Linear-4                   [-1, 41]         188,969
            Linear-5                   [-1, 32]           1,344
            Linear-6                   [-1, 16]             528
            Conv2d-7          [-1, 8, 112, 112]              80
            Conv2d-8           [-1, 16, 52, 52]           3,216
            Conv2d-9           [-1, 32, 24, 24]           4,640
           Linear-10                   [-1, 41]         188,969
           Linear-11                   [-1, 32]           1,344
           Linear-12                   [-1, 16]             528
           Linear-13                    [-1, 1]       

Epoch 1/10:  68%|██████▊   | 4514/6680 [1:23:42<40:10,  1.11s/batch, loss=0.516]  


KeyboardInterrupt: 

CUDA available: True
Number of GPUs: 1
GPU Name: NVIDIA GeForce RTX 3060


In [20]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import pandas as pd

class RealTestDataset(Dataset):
    def __init__(self, image_folder, pairs_file, transform=None):
        self.image_folder = image_folder
        self.pairs = self._read_pairs(pairs_file)
        self.transform = transform

    def _read_pairs(self, pairs_file):
        pairs = []
        with open(pairs_file, 'r') as file:
            for line in file:
                name_1, number_1, name_2, number_2 = line.strip().split()
                img1_path = os.path.join(self.image_folder, name_1, f'{name_1}_{int(number_1):04d}.png')
                img2_path = os.path.join(self.image_folder, name_2, f'{name_2}_{int(number_2):04d}.png')
                label = 1 if name_1 == name_2 else 0
                pairs.append((img1_path.replace('\\', '/'), img2_path.replace('\\', '/'), label))
        return pairs

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img1_path, img2_path, label = self.pairs[idx]
        try:
            img1 = Image.open(img1_path).convert('L')
            img2 = Image.open(img2_path).convert('L')
        except FileNotFoundError:
            # Handle missing image files
            print(f"Warning: Image file not found for pair {idx}. Returning None.")
            return None

        if self.transform:
            img1 = self.transform(img1) if img1 else None
            img2 = self.transform(img2) if img2 else None

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# Define the transform
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor()
])

# Initialize the dataset and dataloader
image_folder = 'lfw_cropped/lfw_cropped'  # Update with the path to your unzipped dataset folder
pairs_file = 'pairs.txt'  # Update with the path to pairs.txt
test_dataset = RealTestDataset(image_folder, pairs_file, transform=transform)

# Filter out pairs with missing images and create DataLoader
filtered_pairs = [pair for pair in test_dataset if pair is not None]
filtered_dataset = [x for x in filtered_pairs if x is not None]
test_loader = DataLoader(filtered_dataset, batch_size=1, shuffle=False)

# Load the saved model weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SiameseNetwork().to(device)
model.load_state_dict(torch.load('networks/final_network.pth'))
model.eval()

# Evaluation function
def evaluate(model, data_loader):
    model.eval()
    mated_scores = []
    non_mated_scores = []
    with torch.no_grad():
        for img1, img2, label in data_loader:
            if img1 is None or img2 is None:
                # Handle missing images: Skip or assign a placeholder score
                continue
            
            img1, img2, label = img1.to(device), img2.to(device), label.to(device)
            outputs = model(img1, img2).squeeze()
            score = torch.sigmoid(outputs).item()
            if label.item() == 1:
                mated_scores.append(score)
            else:
                non_mated_scores.append(score)
    
    # Pad the shorter list with -1 for uneven lengths (optional)
    max_len = max(len(mated_scores), len(non_mated_scores))
    mated_scores.extend([-1] * (max_len - len(mated_scores)))
    non_mated_scores.extend([-1] * (max_len - len(non_mated_scores)))
    
    return mated_scores, non_mated_scores

# Evaluate on the real test set
mated_scores, non_mated_scores = evaluate(model, test_loader)

# Save mated and non-mated scores to .txt files
mated_scores_file = 'mated_scores.txt'
non_mated_scores_file = 'non_mated_scores.txt'

with open(mated_scores_file, 'w') as f:
    for score in mated_scores:
        f.write(f'{score}\n')

with open(non_mated_scores_file, 'w') as f:
    for score in non_mated_scores:
        f.write(f'{score}\n')

print(f'Mated Scores: {mated_scores[:5]}')
print(f'Non-Mated Scores: {non_mated_scores[:5]}')

# Zip the files
import zipfile

with zipfile.ZipFile('predictions.zip', 'w') as zipf:
    zipf.write(mated_scores_file)
    zipf.write(non_mated_scores_file)

# Optionally, save the results to CSV
results_df = pd.DataFrame({'Mated_Scores': mated_scores, 'Non_Mated_Scores': non_mated_scores})
results_df.to_csv('evaluation_results.csv', index=False)




  model.load_state_dict(torch.load('networks/final_network.pth'))


Mated Scores: [0.9458690285682678, 0.9680026173591614, 0.05446825921535492, 0.5319024324417114, 0.6844567656517029]
Non-Mated Scores: [0.07858853042125702, 6.347186717903242e-05, 0.24736875295639038, 8.3736922533717e-05, 0.9689295291900635]


In [23]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

class CustomTestDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        self.subfolders = [subfolder for subfolder in os.listdir(image_folder) if os.path.isdir(os.path.join(image_folder, subfolder))]
        self.pairs = self._create_pairs()

    def _create_pairs(self):
        pairs = []
        for subfolder in self.subfolders:
            subfolder_path = os.path.join(self.image_folder, subfolder)
            images = [img for img in os.listdir(subfolder_path) if img.endswith('.png')]
            if len(images) < 2:
                continue  # Skip folders with fewer than 2 images
            for i in range(len(images)):
                for j in range(i+1, len(images)):
                    img1_path = os.path.join(subfolder_path, images[i])
                    img2_path = os.path.join(subfolder_path, images[j])
                    label = 1 if images[i].split('_')[0] == images[j].split('_')[0] else 0
                    pairs.append((img1_path.replace('\\', '/'), img2_path.replace('\\', '/'), label))
        return pairs

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img1_path, img2_path, label = self.pairs[idx]
        try:
            img1 = Image.open(img1_path).convert('L')
            img2 = Image.open(img2_path).convert('L')
        except FileNotFoundError:
            # Handle missing image files
            print(f"Warning: Image file not found for pair {idx}. Returning None.")
            return None

        if self.transform:
            img1 = self.transform(img1) if img1 else None
            img2 = self.transform(img2) if img2 else None

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# Define the transform
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor()
])

# Initialize the dataset and dataloader
image_folder = 'lfw_cropped/lfw_cropped'  # Update with the path to your unzipped dataset folder
test_dataset = CustomTestDataset(image_folder, transform=transform)

# Create DataLoader for the entire dataset
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)


# Load the saved model weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SiameseNetwork().to(device)
model.load_state_dict(torch.load('networks/final_network.pth'))
model.eval()

# Evaluation function
def evaluate(model, data_loader):
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for img1, img2, label in data_loader:
            if img1 is None or img2 is None:
                # Handle missing images: Skip or assign a placeholder prediction
                all_labels.append(label.item())
                all_predictions.append(-1)  # Placeholder for missing image
                continue
            
            img1, img2, label = img1.to(device), img2.to(device), label.to(device)
            outputs = model(img1, img2).squeeze()
            score = torch.sigmoid(outputs).item()
            prediction = 1 if score >= 0.5 else 0
            
            all_labels.append(label.item())
            all_predictions.append(prediction)
    
    return all_labels, all_predictions

# Evaluate on the entire dataset
all_labels, all_predictions = evaluate(model, test_loader)

# Compute accuracy, precision, recall, and F1-score
accuracy = accuracy_score(all_labels, all_predictions)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_predictions, average='binary')

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

# Optionally, save the results to CSV
results_df = pd.DataFrame({'Labels': all_labels, 'Predictions': all_predictions})
results_df.to_csv('evaluation_results.csv', index=False)


  model.load_state_dict(torch.load('networks/final_network.pth'))


Accuracy: 0.5614
Precision: 1.0000
Recall: 0.5614
F1-score: 0.7191


In [49]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import pandas as pd
import zipfile

# Load the trained model
device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else 
model = SiameseNetwork().to(device)
model.load_state_dict(torch.load('network_epoch9.pth'))
model.eval()
 
# Define the transformation (should match the transformation used during training)
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
])
 
def preprocess_image(image_path):
    image = Image.open(image_path).convert('L')
    image = transform(image)
    image = image.unsqueeze(0)  # Add batch dimension
    return image
 
def compute_similarity(model, image1, image2, alpha=0.5):
    with torch.no_grad():
        embedding1 = model.forward_one(image1)
        embedding2 = model.forward_one(image2)
        distance = torch.mean(F.pairwise_distance(embedding1, embedding2))
        similarity = torch.exp(-alpha * distance)
        # similarity = 1 / (1 + torch.exp(alpha * distance))
    return similarity.item()
 
# Paths to the two images to be compared
image1_path = 'generated_images_10Kids_cropped/00012/010_00012.png'
image2_path = 'generated_images_10Kids_cropped/00011/012_00011.png'
 
# Preprocess the images
image1 = preprocess_image(image1_path)
image2 = preprocess_image(image2_path)
 
# Compute similarity
similarity = compute_similarity(model, image1, image2)
print(f'Similarity between the images: {similarity}')

Similarity between the images: 0.008769567124545574


  model.load_state_dict(torch.load('network_epoch9.pth'))


In [68]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import pandas as pd
import zipfile

# Load the trained model
device = torch.device("cpu")
model = SiameseNetwork().to(device)
model.load_state_dict(torch.load('network_epoch9.pth'))
model.eval()
 
# Define the transformation (should match the transformation used during training)
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
])
 
def preprocess_image(image_path):
    image = Image.open(image_path).convert('L')
    image = transform(image)
    image = image.unsqueeze(0)  # Add batch dimension
    return image
 
def compute_similarity(model, image1, image2, alpha=0.155):
    with torch.no_grad():
        embedding1 = model.forward_one(image1)
        embedding2 = model.forward_one(image2)
        distance = torch.mean(F.pairwise_distance(embedding1, embedding2))
        similarity = torch.exp(-alpha * distance)
        #similarity = 1 / (1 + torch.exp(alpha * distance))
    return similarity.item()
    #return distance
 

class RealTestDataset(Dataset):
    def __init__(self, image_folder, pairs_file, transform=None):
        self.image_folder = image_folder
        self.pairs = self._read_pairs(pairs_file)
        self.transform = transform

    def _read_pairs(self, pairs_file):
        pairs = []
        with open(pairs_file, 'r') as file:
            for line in file:
                name_1, number_1, name_2, number_2 = line.strip().split()
                img1_path = os.path.join(self.image_folder, name_1, f'{name_1}_{int(number_1):04d}.png')
                img2_path = os.path.join(self.image_folder, name_2, f'{name_2}_{int(number_2):04d}.png')
                label = 1 if name_1 == name_2 else 0
                pairs.append((img1_path.replace('\\', '/'), img2_path.replace('\\', '/'), label))
        return pairs

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img1_path, img2_path, label = self.pairs[idx]
        try:
            img1 = Image.open(img1_path).convert('L')
            img2 = Image.open(img2_path).convert('L')
        except FileNotFoundError:
            # Handle missing image files
            print(f"Warning: Image file not found for pair {idx}. Returning placeholder tensors.")
            img1 = Image.new('L', (112, 112))  # Create a black image placeholder
            img2 = Image.new('L', (112, 112))  # Create a black image placeholder

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)


# Define the transform
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor()
])

# Initialize the dataset and dataloader
image_folder = 'lfw_cropped/lfw_cropped'  # Update with the path to your unzipped dataset folder
pairs_file = 'pairs.txt'  # Update with the path to pairs.txt
test_dataset = RealTestDataset(image_folder, pairs_file, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Load the saved model weights
device = torch.device("cpu")
model = SiameseNetwork().to(device)
model.load_state_dict(torch.load('network_epoch9.pth'))
model.eval()

# Evaluation function
def evaluate(model, data_loader):
    model.eval()
    mated_scores = []
    non_mated_scores = []
    all_scores = []
    all_labels = []
    total_pairs = len(data_loader.dataset)

    print(f"Evaluating {total_pairs} pairs...")
    for idx, (img1, img2, label) in enumerate(data_loader):
        if img1 is None or img2 is None:
            mated_scores.append(-1)
            non_mated_scores.append(-1)
            continue
        
        img1, img2, label = img1.to(device), img2.to(device), label.to(device)
        # Compute similarity
        score = compute_similarity(model, img1, img2)
        
        all_scores.append(score)
        all_labels.append(label.item())

        if label.item() == 1:
            mated_scores.append(score)
        else:
            non_mated_scores.append(score)

        # Print progress
        if (idx + 1) % 100 == 0 or (idx + 1) == total_pairs:
            print(f"Progress: {idx + 1}/{total_pairs}")

    return mated_scores, non_mated_scores, all_scores, all_labels

# Evaluate on the real test set specified in pairs.txt
mated_scores, non_mated_scores, all_scores, all_labels = evaluate(model, test_loader)

# Save mated and non-mated scores to .txt files
mated_scores_file = 'mated_scores.txt'
non_mated_scores_file = 'non_mated_scores.txt'

with open(mated_scores_file, 'w') as f:
    for score in mated_scores:
        f.write(f'{score}\n')

with open(non_mated_scores_file, 'w') as f:
    for score in non_mated_scores:
        f.write(f'{score}\n')

print(f'Mated Scores: {mated_scores[:5]}')
print(f'Non-Mated Scores: {non_mated_scores[:5]}')

# Zip the files
zip_filename = 'predictions.zip'
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    zipf.write(mated_scores_file)
    zipf.write(non_mated_scores_file)

print(f"Predictions saved to {zip_filename}")

# Calculate accuracy for mated scores (> 0.5)
mated_correct = sum(score > 0.5 for score in mated_scores)
mated_accuracy = mated_correct / len(mated_scores) * 100

# Calculate accuracy for non-mated scores (< 0.5)
non_mated_correct = sum(score < 0.5 for score in non_mated_scores)
non_mated_accuracy = non_mated_correct / len(non_mated_scores) * 100

print(f"Mated Accuracy: {mated_accuracy:.2f}%")
print(f"Non-Mated Accuracy: {non_mated_accuracy:.2f}%")

# Calculate mean absolute error (MAE)
mae = sum(abs(score - label) for score, label in zip(all_scores, all_labels)) / len(all_labels)
print(f"Mean Absolute Error (MAE): {mae:.4f}")

# Optionally, save the results to CSV
results_df = pd.DataFrame({'Mated_Scores': mated_scores, 'Non_Mated_Scores': non_mated_scores})
results_df.to_csv('evaluation_results.csv', index=False)
print("Results saved to evaluation_results.csv")


  model.load_state_dict(torch.load('network_epoch9.pth'))
  model.load_state_dict(torch.load('network_epoch9.pth'))


Evaluating 6000 pairs...
score0.7192949056625366
score0.8376251459121704
score0.5571852922439575
score0.49213168025016785
score0.9999994039535522
score0.4819338321685791
score0.6932709217071533
score0.6255530118942261
score0.38000646233558655
score0.6552281975746155
score0.5055068135261536
score0.48556917905807495
score0.7381981611251831
score0.5737495422363281
score0.5716968774795532
score0.4911485016345978
score0.8158038854598999
score0.5785324573516846
score0.36657798290252686
score0.6555354595184326
score0.5046345591545105
score0.5633445978164673
score0.6378840804100037
score0.5788945555686951
score0.5040513277053833
score0.6757874488830566
score0.8231813907623291
score0.6567812561988831
score0.7152513265609741
score0.5501581430435181
score0.3212437033653259
score0.4838410019874573
score0.5686158537864685
score0.4345352351665497
score0.4738387167453766
score0.700465977191925
score0.5389913320541382
score0.4111427664756775
score0.7611255645751953
score0.5897251963615417
score0.56040

ValueError: All arrays must be of the same length