In [1]:
#!pip install wandb -qU
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtnartsch[0m ([33mda2cs[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
#from google.colab import drive
#drive.mount('/content/drive')

# # Unzip data
#!unzip /content/drive/MyDrive/generated_images_10Kids_cropped.zip -d my_data

In [3]:
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/121


Looking in indexes: https://download.pytorch.org/whl/121, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
from tqdm import tqdm
from torchsummary import summary
import random

In [5]:
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=5, padding=0)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(32 * 12 * 12, 41)  # Updated to 32 * 12 * 12
        self.fc1komma5 = nn.Linear(41,32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward_one(self, x):
        x = F.relu(self.conv1(x)) # 8 * 112 * 112
        x = F.max_pool2d(x, 2)  # output size: (8, 56, 56)
        x = F.relu(self.conv2(x)) # 16* 52 * 52
        x = F.max_pool2d(x, 2)  # output size: (16, 26, 26)
        x = F.relu(self.conv3(x)) # 32 * 24 * 24
        x = F.max_pool2d(x, 2)  # output size: (32, 12, 12)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc1komma5(x))
        x = F.relu(self.fc2(x))
        return x

    def forward(self, input1, input2):
        output1 = self.forward_one(input1)
        output2 = self.forward_one(input2)
        distance = torch.abs(output1 - output2)
        output = torch.sigmoid(self.fc3(distance))
        return output

In [8]:
import os
import random

class FaceDataset(Dataset):
    def __init__(self, image_folder, people_dirs, transform=None):
        self.image_folder = image_folder
        self.people_dirs = people_dirs
        self.transform = transform
        self.image_pairs = []
        self.labels = []
        self._prepare_data()

    def _prepare_data(self):
        for person_dir in self.people_dirs:
            person_path = os.path.join(self.image_folder, person_dir)
            images = os.listdir(person_path)
            for i in range(len(images)):
                for j in range(i + 1, len(images)):
                    self.image_pairs.append((os.path.join(person_path, images[i]), os.path.join(person_path, images[j])))
                    self.labels.append(1)

                    # Add negative samples
                    neg_person = person_dir
                    while neg_person == person_dir:
                        neg_person = random.choice(self.people_dirs)

                    neg_images = os.listdir(os.path.join(self.image_folder, neg_person))
                    random_image_index = random.randrange(start=0, stop=len(neg_images))
                    self.image_pairs.append((os.path.join(person_path, images[i]), os.path.join(self.image_folder, neg_person, neg_images[random_image_index])))
                    self.labels.append(0)

    def __len__(self):
        return len(self.image_pairs)

    def __getitem__(self, idx):
        img1_path, img2_path = self.image_pairs[idx]
        label = self.labels[idx]
        img1 = Image.open(img1_path).convert('L')
        img2 = Image.open(img2_path).convert('L')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# Function to split dataset
def split_dataset(image_folder, train_ratio=0.9, val_ratio=0.1, test_ratio=0.0, random_seed=None):
    if random_seed is not None: # enable setting a random seed for reproducable splitting
        random.seed(random_seed)
        torch.manual_seed(random_seed)
    people_dirs = os.listdir(image_folder)
    random.shuffle(people_dirs)

    train_end = int(train_ratio * len(people_dirs))
    val_end = train_end + int(val_ratio * len(people_dirs))

    train_dirs = people_dirs[:train_end]
    val_dirs = people_dirs[train_end:val_end]
    test_dirs = people_dirs[val_end:]

    return train_dirs, val_dirs, test_dirs

# Initialize wandb
wandb.init(project='face-dataset-project')

# Hyperparameters and setup
batch_size = 128
learning_rate = 0.01
epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Log hyperparameters to wandb
wandb.config.update({
    "batch_size": batch_size,
    "learning_rate": learning_rate,
    "epochs": epochs,
    "device": str(device)
})

print(f'Batch size: {batch_size}')
print(f'LR: {learning_rate}')
print(f'Epochs: {epochs}')
print(f'Device: {device}')

# Data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor()
])

# Load dataset
image_folder = 'generated_images_10Kids_cropped'  # Update with the path to your dataset
train_dirs, val_dirs, test_dirs = split_dataset(image_folder, random_seed=69)

train_dataset = FaceDataset(image_folder, train_dirs, transform=transform)
val_dataset = FaceDataset(image_folder, val_dirs, transform=transform)
test_dataset = FaceDataset(image_folder, test_dirs, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model = SiameseNetwork().to(device)
summary(model, [(1, 112, 112), (1, 112, 112)])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training script with validation
def train(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{epochs}", unit="batch") as pbar:
            for img1, img2, label in train_loader:
                img1, img2, label = img1.to(device), img2.to(device), label.to(device)
                optimizer.zero_grad()
                outputs = model(img1, img2).squeeze()
                loss = criterion(outputs, label)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                pbar.set_postfix(loss=running_loss / (pbar.n + 1))
                pbar.update(1)
        
        val_loss, val_accuracy = evaluate(model, val_loader, criterion)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {running_loss/len(train_loader)}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")

        # Log metrics to wandb
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": running_loss / len(train_loader),
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        })

        # Save the model
        torch.save(model.state_dict(), f'networks/network_epoch{epoch}.pth')

# Evaluation function
def evaluate(model, data_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for img1, img2, label in data_loader:
            img1, img2, label = img1.to(device), img2.to(device), label.to(device)
            outputs = model(img1, img2).squeeze()
            loss = criterion(outputs, label)
            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == label).sum().item()
            total += label.size(0)
    accuracy = correct / total
    return running_loss / len(data_loader), accuracy

# Train the model
train(model, train_loader, val_loader, criterion, optimizer, epochs=epochs)

# Evaluate on test set
test_loss, test_accuracy = evaluate(model, test_loader, criterion)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

# Log final test metrics to wandb
wandb.log({
    "test_loss": test_loss,
    "test_accuracy": test_accuracy
})

# Finish wandb run  
wandb.finish()


Batch size: 128
LR: 0.01
Epochs: 50
Device: cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 8, 112, 112]              80
            Conv2d-2           [-1, 16, 52, 52]           3,216
            Conv2d-3           [-1, 32, 24, 24]           4,640
            Linear-4                   [-1, 41]         188,969
            Linear-5                   [-1, 32]           1,344
            Linear-6                   [-1, 16]             528
            Conv2d-7          [-1, 8, 112, 112]              80
            Conv2d-8           [-1, 16, 52, 52]           3,216
            Conv2d-9           [-1, 32, 24, 24]           4,640
           Linear-10                   [-1, 41]         188,969
           Linear-11                   [-1, 32]           1,344
           Linear-12                   [-1, 16]             528
           Linear-13                    [-1, 1]       

Epoch 1/50: 100%|██████████| 26718/26718 [1:12:11<00:00,  6.17batch/s, loss=0.462]


Epoch 1/50, Train Loss: 0.46197469149782355, Val Loss: 0.4507094030480226, Val Accuracy: 0.788528947368421


Epoch 2/50: 100%|██████████| 26718/26718 [1:12:10<00:00,  6.17batch/s, loss=0.418]


Epoch 2/50, Train Loss: 0.4181603285836704, Val Loss: 0.4281388880699401, Val Accuracy: 0.8050394736842105


Epoch 3/50: 100%|██████████| 26718/26718 [1:16:21<00:00,  5.83batch/s, loss=0.404]


Epoch 3/50, Train Loss: 0.40374975677882763, Val Loss: 0.3886322734602054, Val Accuracy: 0.8289578947368421


Epoch 4/50: 100%|██████████| 26718/26718 [1:12:17<00:00,  6.16batch/s, loss=0.396]


Epoch 4/50, Train Loss: 0.3955266611019387, Val Loss: 0.37963944192475124, Val Accuracy: 0.8323552631578948


Epoch 5/50: 100%|██████████| 26718/26718 [1:12:17<00:00,  6.16batch/s, loss=0.391]


Epoch 5/50, Train Loss: 0.391199621001808, Val Loss: 0.37447915582815056, Val Accuracy: 0.8373105263157895


Epoch 6/50: 100%|██████████| 26718/26718 [1:12:27<00:00,  6.15batch/s, loss=0.383]


Epoch 6/50, Train Loss: 0.3825654202695869, Val Loss: 0.38875300623162823, Val Accuracy: 0.8305526315789473


Epoch 7/50: 100%|██████████| 26718/26718 [1:13:59<00:00,  6.02batch/s, loss=0.38] 


Epoch 7/50, Train Loss: 0.379543391272545, Val Loss: 0.3645467275609951, Val Accuracy: 0.8393815789473684


Epoch 8/50: 100%|██████████| 26718/26718 [1:16:10<00:00,  5.85batch/s, loss=0.378]


Epoch 8/50, Train Loss: 0.3779658335330413, Val Loss: 0.3664481172575899, Val Accuracy: 0.8396394736842105


Epoch 9/50: 100%|██████████| 26718/26718 [1:27:49<00:00,  5.07batch/s, loss=0.377]


Epoch 9/50, Train Loss: 0.3774044023087321, Val Loss: 0.4339585000565655, Val Accuracy: 0.8052973684210526


Epoch 10/50: 100%|██████████| 26718/26718 [1:24:02<00:00,  5.30batch/s, loss=0.376]


Epoch 10/50, Train Loss: 0.3762639691819375, Val Loss: 0.3880745762248319, Val Accuracy: 0.82695


Epoch 11/50:  50%|████▉     | 13342/26718 [47:29<1:06:35,  3.35batch/s, loss=0.376]

In [None]:
#!pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121

Looking in indexes: https://download.pytorch.org/whl/nightly/cu121, https://pypi.ngc.nvidia.com


In [None]:
print("CUDA available:", torch.cuda.is_available())
print("Number of GPUs:", torch.cuda.device_count())
print("GPU Name:", torch.cuda.get_device_name(0))

CUDA available: True
Number of GPUs: 1
GPU Name: NVIDIA GeForce RTX 3060
