<a href="https://colab.research.google.com/github/kovzanok/dls-final-task/blob/main/2_ArcFace.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Зависимости и загрузка данных

In [None]:
!gdown 1bHLaSZ2frNjyK2hLTXTuPFlFRxfJDxwb

In [None]:
!unzip /content/file.zip -d /content/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/stage_3_dataset/112660.jpg  
  inflating: /content/content/stage_3_dataset/129052.jpg  
  inflating: /content/content/stage_3_dataset/151389.jpg  
  inflating: /content/content/stage_3_dataset/118111.jpg  
  inflating: /content/content/stage_3_dataset/000174.jpg  
  inflating: /content/content/stage_3_dataset/013829.jpg  
  inflating: /content/content/stage_3_dataset/107408.jpg  
  inflating: /content/content/stage_3_dataset/112157.jpg  
  inflating: /content/content/stage_3_dataset/119239.jpg  
  inflating: /content/content/stage_3_dataset/120061.jpg  
  inflating: /content/content/stage_3_dataset/147590.jpg  
  inflating: /content/content/stage_3_dataset/037708.jpg  
  inflating: /content/content/stage_3_dataset/072505.jpg  
  inflating: /content/content/stage_3_dataset/100668.jpg  
  inflating: /content/content/stage_3_dataset/118256.jpg  
  inflating: /content/content/stage_3_dataset/1217

In [None]:
from torch import nn
from torch.nn import functional as F
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from PIL import Image
import torchvision.transforms as transforms
import torchvision.utils as vutils
import torchvision.models as models
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import torchvision.transforms.functional as TF
import cv2
from sklearn.preprocessing import LabelEncoder


import random
import os
import math

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

#Датасет

Загружаем датасет со связью имени изображения и id человека

In [None]:
!gdown 1pmjLR8zU17IQTVWYZrzLU_-XR33f1RtJ

In [None]:
df = pd.read_csv('/content/identity_CelebA.txt', delim_whitespace=True, header=None,index_col=0)

  df = pd.read_csv('/content/drive/MyDrive/identity_CelebA.txt', delim_whitespace=True, header=None,index_col=0)


In [None]:
df.index.name = 'image_name'
df.rename(columns={1:'id'},inplace=True)

In [None]:
df.head()

Unnamed: 0_level_0,id
image_name,Unnamed: 1_level_1
000001.jpg,2880
000002.jpg,2937
000003.jpg,8692
000004.jpg,5805
000005.jpg,9295


In [None]:
def get_filenames_os(folder_path):
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    return files

Получаем имя файлов из датасета, прошедшего выравнивание и кроп моделью из прошлого задания. Если бы использовался датасет CelebA полностью эта операция не нужна была бы, но нам нужны id только тех людей, фото которых мы обработали в прошлом задании.

In [None]:
filenames = get_filenames_os('/content/content/stage_3_dataset')

In [None]:
filtered_df = df.loc[filenames]


Кодируем id людей, т.к. они не упорядочены

In [None]:
le = LabelEncoder()
filtered_df['encoded_id'] = le.fit_transform(filtered_df['id'])

Кастомный датасет, возвращающий изображение и закодированный id

In [None]:
class CelebADataset(Dataset):
    def __init__(self, img_dir, img_names, image_df, transform=None):
        super().__init__()
        self.img_dir = img_dir
        self.transform = transform
        self.image_names = img_names
        self.image_df = image_df

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        id = self.image_df.loc[img_name]['encoded_id']

        return image, int(id)


В финальной версии применен ряд аугментаций, повысивших итоговый accuracy. При первом обучении из аугментаций был только `Resize` и `Normalize`

In [None]:
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomAffine(degrees=5, translate=(0.02, 0.02)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3),
])


dataset = CelebADataset(
    img_dir='/content/content/stage_3_dataset',
    transform=transform,
    image_df=filtered_df,
    img_names = filtered_df.index.to_list()
)

In [None]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [None]:
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=1024)

#Модель

Предобученная модель resnet18 с замененым последним слоем, количество нейронов на конце равно количеству классов(около 10к для нашего датасета)

In [None]:
num_classes = int(filtered_df.nunique().values[0])

In [None]:
num_classes

8191

In [None]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 186MB/s]


В начале обучения разморожен был только `layer4` и `fc`. Далее после достижения "потолка" accuracy было принято решение разморозить `layer3` для его файн-тюнинга

In [None]:
for name, param in model.named_parameters():
    if 'layer3' in name or 'layer4' in name or 'fc' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False


In [None]:
model = model.to(device)

#Обучение

##Функции обучения

In [None]:
def train_epoch(model, classifier, criterion, optimizer, dataloader):
    model.train()
    if classifier is not None:
        classifier.train()

    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(dataloader, leave=False):
        images, labels = images.to(device), labels.to(device)

        features = model(images)  # эмбеддинги

        if classifier is not None:
            logits = classifier(features, labels)
        else:
            logits = features  # если model уже даёт logits

        loss = criterion(logits, labels)

        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(logits, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy


In [None]:
@torch.no_grad()
def val_epoch(model, classifier, criterion, dataloader):
    model.eval()
    if classifier is not None:
        classifier.eval()

    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(dataloader, leave=False):
        images, labels = images.to(device), labels.to(device)

        features = model(images)

        if classifier is not None:
            logits = classifier(features, labels)
        else:
            logits = features

        loss = criterion(logits, labels)
        running_loss += loss.item()

        _, preds = torch.max(logits, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy


In [None]:
def train(model, criterion, optimizer, train_loader, val_loader, num_epochs=100, scheduler=None, classifier=None):
    history = {'train':[],'test':[],'accuracy':[]}
    best_val_loss = float('inf')
    best_accuracy = 0

    for epoch in tqdm(range(num_epochs)):
        train_loss, train_acc = train_epoch(model, classifier, criterion, optimizer, train_loader)
        val_loss, val_acc = val_epoch(model, classifier, criterion, val_loader)

        history['train'].append(train_loss)
        history['test'].append(val_loss)
        history['accuracy'].append(val_acc)

        print(f"Epoch {epoch + 1} — Train Loss: {train_loss:.4f} — Val Loss: {val_loss:.4f} - Accuracy on val: {val_acc}")

        if scheduler:
          scheduler.step(val_acc)

        if val_acc > best_accuracy:
            best_accuracy = val_acc
            torch.save({
                'model': model.state_dict(),
                'classifier': classifier.state_dict() if classifier else None
            }, "/content/best_recognition_model_arc.pth" if classifier else "/content/best_recognition_model.pth")
            print("✅ Saved best model")


    return history

##Cross Enthropy Loss

Тут уже ChatGPT подсказал, что `label_smoothing` аргумент для CE loss'а может быть полезен для борьбы с переобучением и делает границы классификации более "мягкими"

In [None]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.05)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)

Классический цикл обучения с итерацией по эпохам и датасету. Сохранение лучшей модели происходит после каждой эпохи, если полученная accuracy выше предыдущего сохраненного значения

In [None]:
history = train(model, criterion, optimizer, train_loader, val_loader, scheduler=scheduler, epochs = 5)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 1 — Train Loss: 1.4193 — Val Loss: 2.7241 - Accuracy on val: 0.6310957551826258
✅ Model saved (best so far)


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 2 — Train Loss: 1.3138 — Val Loss: 2.6671 - Accuracy on val: 0.6407699901283317
✅ Model saved (best so far)


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 3 — Train Loss: 1.2720 — Val Loss: 2.6431 - Accuracy on val: 0.6466600855544588
✅ Model saved (best so far)


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 4 — Train Loss: 1.2388 — Val Loss: 2.6308 - Accuracy on val: 0.6442250740375124


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 5 — Train Loss: 1.2090 — Val Loss: 2.6255 - Accuracy on val: 0.6507732806844356
✅ Model saved (best so far)


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 6 — Train Loss: 1.1872 — Val Loss: 2.6177 - Accuracy on val: 0.6509049029285949
✅ Model saved (best so far)


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 7 — Train Loss: 1.1683 — Val Loss: 2.6004 - Accuracy on val: 0.6552155314248108
✅ Model saved (best so far)


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 8 — Train Loss: 1.1491 — Val Loss: 2.5997 - Accuracy on val: 0.6548864758144126


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 9 — Train Loss: 1.1333 — Val Loss: 2.5751 - Accuracy on val: 0.6603817045080619
✅ Model saved (best so far)


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 10 — Train Loss: 1.1158 — Val Loss: 2.5920 - Accuracy on val: 0.6578479763079961


  0%|          | 0/950 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epoch 11 — Train Loss: 1.1026 — Val Loss: 2.5973 - Accuracy on val: 0.6573872984534387


  0%|          | 0/950 [00:00<?, ?it/s]

KeyboardInterrupt: 

Обучение на CE loss было реализовано поэтапно. Исходные данные: модель resnet18, предобученная на ImageNet, с размороженным layer4. Для обучения было взято 25% от датасета CelebA, оптимизатор Adam, без scheduler. По итогам обучения в течение +-30 эпох было получено accuracy около 0.11. Модель была сохранена для будущего дообучения. Далее будут приведены шаги по улучшению процесса обучения и их результаты(все шаги были пройдены на модели, которая сохранялась на шаге и загружалась в следующий):
1. Увеличил размер датасета до 75% от исходного и добавил аугментаций. После +-30 эпох accuracy достигло 0.33
2. Разморозил помимо layer4 еще и layer3, уменьшил learning_rate до `1e-4`, добавил scheduler ReduceLROnPlateau, но его настроил на `mode='min'` и передавал в метод `scheduler.step(val_loss)`. После 10 эпох accuracy 0.66
3. Scheduler изменил на `mode='max'` и `scheduler.step(accuracy)`. В итоге удалось добить accuracy до 0.84

##Arc Face Loss

In [None]:
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.5):
        super().__init__()
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # L2 normalize input and weights
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.clamp(cosine**2, 0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m  # cos(θ + m)

        # one-hot encode labels
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1), 1.0)

        # apply arc margin only to target class
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)

        # scale logits
        output *= self.s
        return output


In [None]:
backbone = models.resnet18(pretrained=True)
backbone.fc = nn.Identity()
model = nn.Sequential(backbone, nn.Flatten()).to(device)

arc_margin = ArcMarginProduct(in_features=512, out_features=num_classes).to(device)


for name, param in model.named_parameters():
    if 'layer4' in name or 'fc' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam([
    {'params': model.parameters()},
    {'params': arc_margin.parameters()}
], lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)

In [None]:
history = train(model, criterion, optimizer, train_loader, val_loader, scheduler=scheduler, classifier=arc_margin, epochs = 5)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 1 — Train Loss: 19.3164 — Val Loss: 13.9683 - Accuracy on val: 0.0


  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 2 — Train Loss: 8.1862 — Val Loss: 6.0994 - Accuracy on val: 1.0
✅ Saved best model


  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 3 — Train Loss: 6.0000 — Val Loss: 5.9590 - Accuracy on val: 1.0


  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 4 — Train Loss: 5.9129 — Val Loss: 5.9055 - Accuracy on val: 1.0


  0%|          | 0/119 [00:00<?, ?it/s]

KeyboardInterrupt: 