In [None]:
%cd ..

In [1]:
import os

import albumentations as A
import cv2
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from albumentations.pytorch import ToTensorV2
from pytorch_metric_learning import losses, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator
from torch.utils.data import DataLoader, Dataset
from torchvision.models import efficientnet_v2_s
from tqdm import tqdm

from config import CFG
from dataset import get_data_train_and_data_irm, split_dataset_by_photo

In [2]:
df_train, data_irm = get_data_train_and_data_irm(min_number_of_photo=20, max_number_of_photo=28)

Число уникальных людей 10174. Всего фото 194716
Датасет для тренировки содержит 2143 людей
data 194716 -> train_data 60004 data_irm 131425


In [3]:
df_train, df_val, df_test = split_dataset_by_photo(df_train, 4)

df(60004) -> train(42860) val(8572) test(8572)


In [4]:
class CelebaDataet(Dataset):
    def __init__(self, df: pd.DataFrame) -> None:
        # image, label
        self.df = df[["path", "label"]].values
        self.transform = A.Compose(
            [
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                ToTensorV2(),
            ]
        )

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_path, label = self.df[index]
        img = cv2.imread(os.path.join(CFG.img_folder_dst, img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transform(image=img)["image"]
        return img, label

In [5]:
label_amount = len(df_train.label.unique())
label_amount

2143

In [6]:
train_dataset = CelebaDataet(df_train)
val_dataset = CelebaDataet(df_val)
test_dataset = CelebaDataet(df_test)

train_dataloader = DataLoader(train_dataset, batch_size=26, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=26, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=26, shuffle=False)

In [7]:
len(train_dataloader)

1649

In [8]:
### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
emb_size = 1280
device = "cuda"
model = efficientnet_v2_s()
model.load_state_dict(torch.load("models/efficientnet_v2_s.pth"))
model.classifier = nn.Sequential()
model = model.to(device)
None


### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
def train(model, loss_func, device, train_loader, optimizer, loss_optimizer, epoch):
    model.train()
    # for batch_idx, (data, labels) in enumerate(train_loader):
    for batch_idx, (data, labels) in tqdm(enumerate(train_loader), total=len(train_loader)):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        loss_optimizer.zero_grad()
        embeddings = model(data)
        loss = loss_func(embeddings, labels)
        loss.backward()
        optimizer.step()
        loss_optimizer.step()
        if batch_idx % 300 == 0:
            print("Epoch {} Iteration {}: Loss = {}".format(epoch, batch_idx, loss))


### convenient function from pytorch-metric-learning ###
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)


### compute accuracy using AccuracyCalculator from pytorch-metric-learning ###
def test(train_set, test_set, model, accuracy_calculator):
    train_embeddings, train_labels = get_all_embeddings(train_set, model)
    test_embeddings, test_labels = get_all_embeddings(test_set, model)
    train_labels = train_labels.squeeze(1)
    test_labels = test_labels.squeeze(1)
    print("Computing accuracy")
    accuracies = accuracy_calculator.get_accuracy(test_embeddings, test_labels, train_embeddings, train_labels, False)
    print("Test set accuracy (Precision@1) = {}".format(accuracies["precision_at_1"]))


optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 4


### pytorch-metric-learning stuff ###
loss_func = losses.SubCenterArcFaceLoss(num_classes=label_amount, embedding_size=emb_size).to(device)
loss_optimizer = torch.optim.Adam(loss_func.parameters(), lr=1e-4)
accuracy_calculator = AccuracyCalculator(include=("precision_at_1",), k=1)
### pytorch-metric-learning stuff ###

In [None]:
test(train_dataset, val_dataset, model, accuracy_calculator)

100%|██████████| 1340/1340 [04:06<00:00,  5.45it/s]
100%|██████████| 268/268 [00:44<00:00,  6.05it/s]


Computing accuracy


  x.storage().data_ptr() + x.storage_offset() * 4)


Test set accuracy (Precision@1) = 0.13602426504899673


In [10]:
for epoch in range(1, num_epochs + 1):
    train(model, loss_func, device, train_dataloader, optimizer, loss_optimizer, epoch)
test(train_dataset, val_dataset, model, accuracy_calculator)

  0%|          | 1/1649 [00:02<1:09:58,  2.55s/it]

Epoch 1 Iteration 0: Loss = 39.258689880371094


 18%|█▊        | 301/1649 [01:43<11:31,  1.95it/s]

Epoch 1 Iteration 300: Loss = 39.0865592956543


 36%|███▋      | 601/1649 [03:29<06:27,  2.71it/s]

Epoch 1 Iteration 600: Loss = 39.00695037841797


 55%|█████▍    | 901/1649 [05:35<06:28,  1.92it/s]

Epoch 1 Iteration 900: Loss = 38.46174240112305


 73%|███████▎  | 1201/1649 [08:08<03:44,  1.99it/s]

Epoch 1 Iteration 1200: Loss = 38.541290283203125


 91%|█████████ | 1501/1649 [10:27<01:12,  2.05it/s]

Epoch 1 Iteration 1500: Loss = 38.10492706298828


  0%|          | 1/1649 [00:00<15:03,  1.82it/s]   

Epoch 2 Iteration 0: Loss = 37.44302749633789


 18%|█▊        | 301/1649 [02:12<10:53,  2.06it/s]

Epoch 2 Iteration 300: Loss = 37.412288665771484


 36%|███▋      | 601/1649 [04:23<08:12,  2.13it/s]

Epoch 2 Iteration 600: Loss = 36.49704360961914


 55%|█████▍    | 901/1649 [06:33<05:45,  2.16it/s]

Epoch 2 Iteration 900: Loss = 37.08305740356445


 73%|███████▎  | 1201/1649 [08:45<03:38,  2.05it/s]

Epoch 2 Iteration 1200: Loss = 36.897743225097656


 91%|█████████ | 1501/1649 [10:56<01:08,  2.18it/s]

Epoch 2 Iteration 1500: Loss = 36.53855514526367


  0%|          | 1/1649 [00:00<15:29,  1.77it/s]   

Epoch 3 Iteration 0: Loss = 34.98868179321289


 18%|█▊        | 301/1649 [02:12<10:37,  2.12it/s]

Epoch 3 Iteration 300: Loss = 34.777976989746094


 36%|███▋      | 601/1649 [04:23<08:12,  2.13it/s]

Epoch 3 Iteration 600: Loss = 34.61711502075195


 55%|█████▍    | 901/1649 [06:37<05:46,  2.16it/s]

Epoch 3 Iteration 900: Loss = 33.203521728515625


 73%|███████▎  | 1201/1649 [08:49<03:35,  2.08it/s]

Epoch 3 Iteration 1200: Loss = 33.50142288208008


 91%|█████████ | 1501/1649 [11:08<01:08,  2.16it/s]

Epoch 3 Iteration 1500: Loss = 33.13824462890625


  0%|          | 1/1649 [00:00<14:33,  1.89it/s]   

Epoch 4 Iteration 0: Loss = 31.644588470458984


 18%|█▊        | 301/1649 [02:11<10:07,  2.22it/s]

Epoch 4 Iteration 300: Loss = 31.558082580566406


 36%|███▋      | 601/1649 [04:31<08:38,  2.02it/s]

Epoch 4 Iteration 600: Loss = 30.552515029907227


 55%|█████▍    | 901/1649 [06:44<05:54,  2.11it/s]

Epoch 4 Iteration 900: Loss = 30.762760162353516


 73%|███████▎  | 1201/1649 [08:58<03:27,  2.15it/s]

Epoch 4 Iteration 1200: Loss = 29.881664276123047


 91%|█████████ | 1501/1649 [11:09<01:07,  2.20it/s]

Epoch 4 Iteration 1500: Loss = 28.324228286743164


100%|██████████| 1340/1340 [03:58<00:00,  5.63it/s]
100%|██████████| 268/268 [00:52<00:00,  5.07it/s]
  x.storage().data_ptr() + x.storage_offset() * 4)


Computing accuracy
Test set accuracy (Precision@1) = 0.7866308912739151


In [11]:
# Ниии плохо за4 эпохи с  (Precision@1) = 0.13 - > 0.79
test(train_dataset, val_dataset, model, accuracy_calculator)

100%|██████████| 1340/1340 [04:00<00:00,  5.58it/s]
100%|██████████| 268/268 [00:47<00:00,  5.64it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.7866308912739151


In [12]:
torch.save(model.state_dict(), "models/efficientnet_v2_s_arcface_4_epoch.pth")