In [1]:
# !pip install wandb -qU
# import wandb
# wandb.login()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# # Unzip data
!unzip /content/drive/MyDrive/generated_images_10Kids_cropped.zip -d my_data

In [None]:

!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
from tqdm import tqdm
from torchsummary import summary

In [2]:
# Siamese Network
class TinySiameseNetwork(nn.Module):
    def __init__(self):
        super(TinySiameseNetwork, self).__init__()
        self.conv_net = nn.Sequential(
            nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1), # double to (1,4...)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(2, 4, kernel_size=3, stride=1, padding=1), # double to (4,8...)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=1), # double to (4,8...)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(8, 4, kernel_size=3, stride=1, padding=1), # double to (4,8...)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(4, 2, kernel_size=3, stride=1, padding=1), # double to (4,8...)
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(2*28*28, 8), # double to (8*28*28, 16)
            nn.ReLU(),
            nn.Linear(8, 1), # double to (16,1)
            nn.Sigmoid()
        )

    def forward_once(self, x):
        output = self.conv_net(x)
        output = output.view(output.size()[0], -1)
        output = self.fc(output)
        return output

    def forward(self, img1, img2):
        output1 = self.forward_once(img1)
        output2 = self.forward_once(img2)
        return torch.abs(output1 - output2)


class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=5, padding=0)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(32 * 12 * 12, 41)  # Updated to 32 * 12 * 12
        self.fc1komma5 = nn.Linear(41,32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward_one(self, x):
        x = F.relu(self.conv1(x)) # 8 * 112 * 112
        x = F.max_pool2d(x, 2)  # output size: (8, 56, 56)
        x = F.relu(self.conv2(x)) # 16* 52 * 52
        x = F.max_pool2d(x, 2)  # output size: (16, 26, 26)
        x = F.relu(self.conv3(x)) # 32 * 24 * 24
        x = F.max_pool2d(x, 2)  # output size: (32, 12, 12)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc1komma5(x))
        x = F.relu(self.fc2(x))
        return x

    def forward(self, input1, input2):
        output1 = self.forward_one(input1)
        output2 = self.forward_one(input2)
        distance = torch.abs(output1 - output2)
        output = torch.sigmoid(self.fc3(distance))
        return output


In [3]:
import random

class FaceDataset(Dataset):
    def __init__(self, image_folder, indices, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        self.image_pairs = []
        self.labels = []
        self.indices = indices
        self._prepare_data()

    def _prepare_data(self):
        people_dirs = os.listdir(self.image_folder)
        #for person_dir in people_dirs:
        for i in self.indices:
            person_dir = people_dirs[i]
            person_path = os.path.join(self.image_folder, person_dir)
            images = os.listdir(person_path)

            for i in range(len(images)):
                for j in range(i + 1, len(images)):
                    self.image_pairs.append((os.path.join(person_path, images[i]), os.path.join(person_path, images[j])))
                    self.labels.append(1)

                    # Add negative samples
                    neg_person = person_dir
                    while neg_person == person_dir:
                        neg_person_index = random.choice(self.indices)
                        neg_person = people_dirs[neg_person_index]
                    neg_images = os.listdir(os.path.join(self.image_folder, neg_person))
                    random_image_index = random.randrange(start=0, stop=len(neg_images))
                    self.image_pairs.append((os.path.join(person_path, images[i]), os.path.join(self.image_folder, neg_person, neg_images[random_image_index])))
                    self.labels.append(0)

    def __len__(self):
        return len(self.image_pairs)

    def __getitem__(self, idx):
        img1_path, img2_path = self.image_pairs[idx]
        label = self.labels[idx]
        img1 = Image.open(img1_path).convert('L')
        img2 = Image.open(img2_path).convert('L')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# Split dataset into training, validation, and test sets
def split_dataset(total_len, train_ratio=0.75, val_ratio=0.15, test_ratio=0.10, random_seed=None):
    if random_seed is not None:
        random.seed(random_seed)
        torch.manual_seed(random_seed)

    indices = list(range(total_len))
    random.shuffle(indices)

    train_end = int(train_ratio * total_len)
    val_end = train_end + int(val_ratio * total_len)

    train_indices = indices[:train_end]
    val_indices = indices[train_end:val_end]
    test_indices = indices[val_end:]
    return train_indices, val_indices, test_indices

# Evaluation function
def evaluate(model, data_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for img1, img2, label in data_loader:
            img1, img2, label = img1.to(device), img2.to(device), label.to(device)
            outputs = model(img1, img2).squeeze()
            loss = criterion(outputs, label)
            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == label).sum().item()
            total += label.size(0)
    accuracy = correct / total
    return running_loss / len(data_loader), accuracy

# Training script with validation
def train(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{epochs}", unit="batch") as pbar:
            for img1, img2, label in train_loader:
                img1, img2, label = img1.to(device), img2.to(device), label.to(device)
                optimizer.zero_grad()
                outputs = model(img1, img2).squeeze()
                loss = criterion(outputs, label)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                pbar.set_postfix(loss=running_loss / (pbar.n + 1))
                pbar.update(1)

        val_loss, val_accuracy = evaluate(model, val_loader, criterion)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {running_loss/len(train_loader)}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")

        # Save the model
        torch.save(model.state_dict(), f'networks/network_epoch{epoch}.pth')

# Hyperparameters and setup
batch_size = 200
learning_rate = 0.01
epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f'Batch size: {batch_size}')
print(f'LR: {learning_rate}')
print(f'Epochs: {epochs}')
print(f'Device: {device}')

# Data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor()
])

# Load dataset
# image_folder = '/content/my_data/generated_images_10Kids_cropped'  # Update with the path to your dataset
image_folder = 'generated_images_10Kids_cropped/generated_images_10Kids_cropped'  # Update with the path to your dataset
train_indices, val_indices, test_indices = split_dataset(int(0.001*10000), random_seed=69)

train_dataset = FaceDataset(image_folder, indices=train_indices, transform=transform)
val_dataset = FaceDataset(image_folder, indices=val_indices, transform=transform)
test_dataset = FaceDataset(image_folder, indices=test_indices, transform=transform)

# Model, loss, and optimizer
model = SiameseNetwork().to(device)
summary(model, [(1, 112, 112), (1, 112, 112)])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


train_loader = DataLoader(train_dataset, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Train the model
train(model, train_loader, val_loader, criterion, optimizer, epochs=epochs)

# Evaluate on test set
test_loss, test_accuracy = evaluate(model, test_loader, criterion)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


Batch size: 200
LR: 0.01
Epochs: 100
Device: cpu


KeyboardInterrupt: 