### Installing Dependencies

In [1]:
# %pip install timm
# %pip install onnx

TO DO
- [X] Train/Test Split
- [X] Normalization
- [X] Data Augmentation
- [ ] Hyperparameter Tuning
- [ ] Figure out ONNX Verification, Inference
- [ ] Export to TensorFlow.js?

In [None]:
"""
Outliers?
https://kevinmusgrave.github.io/pytorch-metric-learning/
https://github.com/KevinMusgrave/pytorch-metric-learning/blob/master/examples/README.md
https://colab.research.google.com/github/KevinMusgrave/pytorch-metric-learning/blob/master/examples/notebooks/SubCenterArcFaceMNIST.ipynb#scrollTo=GJ_L0TrTDnEA
---> Get_Outliers()
"""

## Training ConvNext-Atto

In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms

from torch.utils.tensorboard import SummaryWriter

from pytorch_metric_learning import losses, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

import timm

import onnx
import onnxruntime

from PIL import UnidentifiedImageError

from pathlib import Path




In [4]:
log = False

In [5]:
if log:
    writer = SummaryWriter()

batch_size = 32
epochs = 500
learning_rate = 1e-3
loss_lr = 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

num_classes = 20 # ~100*12
embedding_size = 128

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

if log:
    writer.add_scalar('Hyperparameters/Batch_size', batch_size, 0)
    writer.add_scalar('Hyperparameters/Epochs', epochs, 0)
    writer.add_scalar('Hyperparameters/Learning_rate', learning_rate, 0)
    writer.add_scalar('Hyperparameters/Loss_lr', loss_lr, 0)
    writer.add_scalar('Hyperparameters/Num_classes', num_classes, 0)
    writer.add_scalar('Hyperparameters/Embedding_size', embedding_size, 0)

In [6]:
# Shouldn't really throw an error, but just in case
class RobustImageFolder(datasets.ImageFolder):
    def __getitem__(self, index):
        path, target = self.samples[index]
        try:
            sample = self.loader(path)
        except UnidentifiedImageError:
            print(f"\033[91mSkipping Corrupt Image:\033[0m {Path(path)}")            
            # return None, None
            return self.__getitem__(index + 1)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return sample, target

In [7]:
train_dataset = RobustImageFolder('../faces/split/train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = RobustImageFolder('../faces/split/val', transform=transform)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [8]:
class ConvNeXtArcFace(nn.Module):
    def __init__(self, model_name, embedding_size, pretrained=False):
        super(ConvNeXtArcFace, self).__init__()
        self.convnext = timm.create_model(model_name, pretrained=pretrained)
        self.convnext.reset_classifier(num_classes=0, global_pool='avg')
        self.embedding_layer = nn.Linear(self.convnext.num_features, embedding_size)
        
    def forward(self, x):
        x = self.convnext(x)
        embeddings = self.embedding_layer(x)
        return embeddings

In [9]:
model_name = 'convnextv2_atto'
model = ConvNeXtArcFace(model_name, embedding_size)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

criterion = losses.ArcFaceLoss(num_classes=num_classes, embedding_size=embedding_size, margin=4).to(device)
loss_optimizer = optim.Adam(criterion.parameters(), lr=loss_lr)

start_epoch = 1

def load_checkpoint(filepath, model, optimizer, loss_optimizer, criterion):
    checkpoint = torch.load(filepath)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    criterion.load_state_dict(checkpoint['criterion_state_dict'])
    loss_optimizer.load_state_dict(checkpoint['loss_optimizer_state_dict'])
    epoch = checkpoint['epoch'] + 1
    loss = checkpoint['loss']
    return model, optimizer, loss_optimizer, criterion, epoch, loss

In [10]:
checkpoint = None # "epoch_5.pth"
if checkpoint:
    model, optimizer, loss_optimizer, criterion, start_epoch, loss = load_checkpoint(
        f"checkpoints/{checkpoint}", model, optimizer, loss_optimizer, criterion
        )

In [11]:
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)

accuracy_calculator = AccuracyCalculator(include=("precision_at_1",), k=1)

In [12]:
ckpt = [1, 3, 5, 10, 15, 25, 40, 60, 80, 90, 110, 130, 150, 175]
for epoch in range(start_epoch, epochs+1):
    model.train()
    running_loss = 0.0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        print(f"Epoch: {epoch}, Batch: {batch_idx + 1}/{len(train_loader)}")
        inputs = inputs.to(device)        
        targets = targets.to(device)
        inputs = inputs.float()

        optimizer.zero_grad()
        loss_optimizer.zero_grad()

        embeddings = model(inputs)
        loss = criterion(embeddings, targets)
        if log:    
            writer.add_scalar('Loss/train', loss.item(), (epoch-1) * len(train_loader) + batch_idx + 1)  
          
        loss.backward()
        optimizer.step()
        loss_optimizer.step()
        
        running_loss += loss.item()


    train_embeddings, train_labels = get_all_embeddings(train_dataset, model)
    val_embeddings, val_labels = get_all_embeddings(val_dataset, model)

    train_labels = train_labels.squeeze(1)
    val_labels = val_labels.squeeze(1)

    accuracies = accuracy_calculator.get_accuracy(
            train_embeddings, train_labels, train_embeddings, train_labels, False
        )
    training_accuracy = accuracies['precision_at_1']
    if log:
        writer.add_scalar('Accuracy/Training', training_accuracy, epoch)
    print(f"\033[92mTrain Set Accuracy = {training_accuracy}\033[0m")

    accuracies = accuracy_calculator.get_accuracy(
            val_embeddings, val_labels, train_embeddings, train_labels, False
        )
    validation_accuracy = accuracies['precision_at_1']
    if log:
        writer.add_scalar('Accuracy/Validation', validation_accuracy, epoch)
    print(f"\033[92mTest Set Accuracy = {validation_accuracy}\033[0m")


    if (epoch) in ckpt:
        torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss_optimizer_state_dict': loss_optimizer.state_dict(),
                    'criterion_state_dict': criterion.state_dict(),
                    'loss': running_loss,
                    }, f"checkpoints/epoch_{epoch}.pth")
        if log:
            writer.flush()


    print(f"\033[91mEpoch [{epoch}/{epochs}], Loss: {loss.item()}\033[0m")

if log:
    writer.close()

Epoch: 1, Batch: 1/26
Epoch: 1, Batch: 2/26
Epoch: 1, Batch: 3/26
Epoch: 1, Batch: 4/26
Epoch: 1, Batch: 5/26
Epoch: 1, Batch: 6/26
Epoch: 1, Batch: 7/26
Epoch: 1, Batch: 8/26
Epoch: 1, Batch: 9/26
Epoch: 1, Batch: 10/26
Epoch: 1, Batch: 11/26
Epoch: 1, Batch: 12/26
Epoch: 1, Batch: 13/26
Epoch: 1, Batch: 14/26
Epoch: 1, Batch: 15/26
Epoch: 1, Batch: 16/26
Epoch: 1, Batch: 17/26
Epoch: 1, Batch: 18/26
Epoch: 1, Batch: 19/26
Epoch: 1, Batch: 20/26
Epoch: 1, Batch: 21/26
Epoch: 1, Batch: 22/26
Epoch: 1, Batch: 23/26
Epoch: 1, Batch: 24/26
Epoch: 1, Batch: 25/26
Epoch: 1, Batch: 26/26


  0%|          | 0/1 [00:05<?, ?it/s]


RuntimeError: DataLoader worker (pid(s) 29008, 28768) exited unexpectedly

In [None]:
torch.save(model.state_dict(), 'convnext_atto_arcface.pth')

### Testing Inference

In [None]:
model = ConvNeXtArcFace(model_name, embedding_size)
model = model.to(device)
model.load_state_dict(torch.load('convnext_atto_arcface.pth'))

<All keys matched successfully>

In [None]:
test_dataset = RobustImageFolder('../faces/split/test', transform=transform)
test_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)

accuracy_calculator = AccuracyCalculator(include=("precision_at_1",), k=1)

In [None]:
train_embeddings, train_labels = get_all_embeddings(train_dataset, model)
test_embeddings, test_labels = get_all_embeddings(test_dataset, model)

train_labels = train_labels.squeeze(1)
test_labels = test_labels.squeeze(1)

  0%|          | 0/26 [00:05<?, ?it/s]


RuntimeError: DataLoader worker (pid(s) 26324, 13400) exited unexpectedly

In [None]:
print("Computing accuracy...")
# accuracies = accuracy_calculator.get_accuracy(
        # test_embeddings, test_labels, train_embeddings, train_labels, False
    # )
# print("Test set accuracy = {}".format(accuracies["precision_at_1"]))

Computing accuracy...


NameError: name 'accuracy_calculator' is not defined

In [None]:

# embeddings, labels = [], []
# model.eval()
# with torch.no_grad():
#     for inputs, targets in test_loader:
#         inputs = inputs.to(device)
#         targets = targets.to(device)
#         inputs = inputs.float()
#         embedding = model(inputs)
#         embeddings.append(embedding)
#         labels.append(targets)

# embeddings = torch.cat(embeddings).cpu()
# labels = torch.cat(labels).cpu()

# accuracy = accuracy_calculator.get_accuracy(embeddings, embeddings, labels, labels, False)
# print(accuracy)

TypeError: sum() received an invalid combination of arguments - got (axis=int, out=NoneType, ), but expected one of:
 * (*, torch.dtype dtype)
      didn't match because some of the keywords were incorrect: axis, out
 * (tuple of ints dim, bool keepdim, *, torch.dtype dtype)
 * (tuple of names dim, bool keepdim, *, torch.dtype dtype)


### Saving Model

In [None]:
# dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True).to(device)
# dummy_output = model(dummy_input)
# torch.onnx.export(model, dummy_input, "convnext_atto_arcface.onnx", export_params=True)

In [None]:
# onnx_model = onnx.load("convnext_atto_arcface.onnx")
# onnx.checker.check_model(onnx_model) # 

In [None]:
# ort_session = onnxruntime.InferenceSession("convnext_atto_arcface.onnx", providers=["CUDAExecutionProvider"])

# def to_numpy(tensor):
#     return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_input)}
# ort_outs = ort_session.run(None, ort_inputs)

# # np.testing.assert_allclose(to_numpy(dummy_output), ort_outs[0], rtol=1e-03, atol=1e-05)
# # print("Exported model has been tested with ONNXRuntime, and the result looks good!")