In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from sklearn.metrics.pairwise import cosine_similarity
import onnx
import cv2
import numpy as np
import onnxruntime
import pandas as pd
from tqdm import tqdm

In [2]:
# ort_session = ort.InferenceSession(r'models/arcface.onnx', providers=['CUDAExecutionProvider'])

In [3]:
class ArcFaceONNX:
    def __init__(self, model_file=None, session=None):
        assert model_file is not None
        self.model_file = model_file
        self.session = session
        self.taskname = 'recognition'
        find_sub = False
        find_mul = False
        model = onnx.load(self.model_file)
        graph = model.graph
        for nid, node in enumerate(graph.node[:8]):
            #print(nid, node.name)
            if node.name.startswith('Sub') or node.name.startswith('_minus'):
                find_sub = True
            if node.name.startswith('Mul') or node.name.startswith('_mul'):
                find_mul = True
        if find_sub and find_mul:
            #mxnet arcface model
            input_mean = 0.0
            input_std = 1.0
        else:
            input_mean = 127.5
            input_std = 127.5
        self.input_mean = input_mean
        self.input_std = input_std
        #print('input mean and std:', self.input_mean, self.input_std)
        if self.session is None:
            self.session = onnxruntime.InferenceSession(self.model_file, providers=['CUDAExecutionProvider'])
        input_cfg = self.session.get_inputs()[0]
        input_shape = input_cfg.shape
        input_name = input_cfg.name
        self.input_size = tuple(input_shape[2:4][::-1])
        self.input_shape = input_shape
        outputs = self.session.get_outputs()
        output_names = []
        for out in outputs:
            output_names.append(out.name)
        self.input_name = input_name
        self.output_names = output_names
        assert len(self.output_names)==1
        self.output_shape = outputs[0].shape

    def prepare(self, ctx_id, **kwargs):
        if ctx_id<0:
            self.session.set_providers(['CUDAExecutionProvider'])

    # def get(self, img, face):
    #     aimg = face_align.norm_crop(img, landmark=face.kps, image_size=self.input_size[0])
    #     face.embedding = self.get_feat(aimg).flatten()
    #     return face.embedding

    def compute_sim(self, feat1, feat2):
        from numpy.linalg import norm
        feat1 = feat1.ravel()
        feat2 = feat2.ravel()
        sim = np.dot(feat1, feat2) / (norm(feat1) * norm(feat2))
        return sim

    def get_feat(self, imgs):
        if not isinstance(imgs, list):
            imgs = [imgs]
        input_size = self.input_size
        
        blob = cv2.dnn.blobFromImages(imgs, 1.0 / self.input_std, input_size,
                                      (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
        net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
        return net_out

    def forward(self, batch_data):
        blob = (batch_data - self.input_mean) / self.input_std
        net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
        return net_out

In [4]:
embedder_r100 = ArcFaceONNX(r'models/arcface.onnx')
embedder_r50 = ArcFaceONNX(r'models/w600k_r50.onnx')

In [5]:
embedder_r50.session.get_session_options()

<onnxruntime.capi.onnxruntime_pybind11_state.SessionOptions at 0x1f5328f2d70>

In [6]:
onnxruntime.get_available_providers()

['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']

In [7]:
dataset = ImageFolder(root=r'D:\data\biometrics_hack\casia_webface', transform=np.asarray)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=6)

In [8]:
emb_r100 = embedder_r100.get_feat(np.asarray(dataset[0][0]))
emb_r50 = embedder_r50.get_feat(np.asarray(dataset[0][0]))

In [9]:
# Получение списка классов (идентичностей)
classes = dataset.classes  # Список имён классов
class_to_idx = dataset.class_to_idx  # Словарь: имя класса -> индекс

# Получение индексов для каждого класса
class_indices = {}
for idx, (path, label) in enumerate(dataset.samples):
    class_indices.setdefault(label, []).append(idx)

# Выбор случайных 100 идентичностей для тестовой выборки
num_test_classes = 100
test_classes = np.random.choice(list(class_indices.keys()), size=num_test_classes, replace=False)
train_classes = list(set(class_indices.keys()) - set(test_classes))

# Получение индексов для обучающей и тестовой выборок
train_indices = [idx for cls in train_classes for idx in class_indices[cls]]
test_indices = [idx for cls in test_classes for idx in class_indices[cls]]

# Создание обучающей и тестовой выборок
train_dataset = torch.utils.data.Subset(dataset, train_indices)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=6)
test_dataset = torch.utils.data.Subset(dataset, test_indices)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=6)

In [None]:
test_loader

In [10]:
def get_embeddings_batch(model, imgs_batch):
    embeddings = []
    for img in imgs_batch:
        # Убедимся, что изображение имеет правильную форму и тип
        if img.ndim == 2:  # Если изображение чёрно-белое, конвертируем в RGB
            img = np.stack([img]*3, axis=-1)
        elif img.shape[0] == 3:
            img = img.transpose(1, 2, 0)  # Преобразуем из (C, H, W) в (H, W, C)
        elif img.shape[2] != 3:
            img = img[:, :, :3]  # Оставляем только первые 3 канала
        img = img.astype(np.uint8)
        emb = model.get_feat(img)
        embeddings.append(emb)
    embeddings = np.stack(embeddings)
    return embeddings


def prepare_and_save_embeddings(data_loader, embedder_r50, embedder_r100, csv_filename):
    data = []
    for batch_idx, (imgs, labels) in enumerate(data_loader):
        labels = labels.numpy()
        
        # Получаем эмбеддинги для текущего батча
        embeddings_r50 = get_embeddings_batch(embedder_r50, imgs)
        embeddings_r100 = get_embeddings_batch(embedder_r100, imgs)
        
        # Собираем данные
        for i in range(len(labels)):
            data.append({
                'label': int(labels[i]),
                'emb_r50': embeddings_r50[i].tolist(),
                'emb_r100': embeddings_r100[i].tolist()
            })
        
        # Опционально: вывод прогресса
        if (batch_idx + 1) % 100 == 0:
            print(f"Processed {batch_idx + 1} batches")
    
    # Конвертируем в DataFrame и сохраняем
    df = pd.DataFrame(data)
    df.to_csv(csv_filename, index=False)
    print(f"Embeddings saved to {csv_filename}")

In [20]:
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()

# Загрузка CSV
df = pd.read_csv('train_embeddings.csv')

# Преобразование эмбеддингов из строк в массивы
df['emb_r50'] = df['emb_r50'].progress_apply(lambda x: np.array(eval(x))[0])
df['emb_r100'] = df['emb_r100'].progress_apply(lambda x: np.array(eval(x))[0])

# Сохраняем в новый CSV или бинарный формат, например, .parquet для ускорения загрузки
df.to_parquet('train_embeddings.parquet')

100%|██████████| 448575/448575 [07:28<00:00, 1001.22it/s]
100%|██████████| 448575/448575 [07:27<00:00, 1002.92it/s]


In [30]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
class EmbeddingDataset(Dataset):
    def __init__(self, parquet_file):
        # Загрузка предобработанных данных
        self.data = pd.read_parquet(parquet_file)
        self.labels = self.data['label'].values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        emb_r50 = self.data.iloc[idx]['emb_r50']
        emb_r100 = self.data.iloc[idx]['emb_r100']
        label = self.labels[idx]
        # Конвертируем эмбеддинги в тензоры
        emb_r50 = torch.tensor(emb_r50, dtype=torch.float32)
        emb_r100 = torch.tensor(emb_r100, dtype=torch.float32)
        return emb_r50, emb_r100, label

In [24]:
batch_size = 256

# Создаём датасеты из CSV файлов
train_dataset = EmbeddingDataset('train_embeddings.parquet')
test_dataset = EmbeddingDataset('test_embeddings.parquet')

In [25]:
# Создаём DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [38]:
class EmbeddingMapper(nn.Module):
    def __init__(self, input_dim=512, output_dim=512):
        super(EmbeddingMapper, self).__init__()
        # self.model = nn.Sequential(
        #     nn.Linear(input_dim, 1024),
        #     nn.ReLU(),
        #     nn.BatchNorm1d(1024),
        #     nn.Linear(1024, 1024),
        #     nn.ReLU(),
        #     nn.BatchNorm1d(1024),
        #     nn.Linear(1024, output_dim)
        # )
        self.lin1 = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024))
        self.lin2 = nn.Sequential(
            nn.Linear(1024, 2048),
            nn.ReLU(),
            nn.BatchNorm1d(2048))
        self.lin3 = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024))
        self.lin4 = nn.Linear(1024, output_dim)
        
    def forward(self, x):
        # print(x.shape)
        x = self.lin1(x)
        x = self.lin2(x)
        x = self.lin3(x)
        x = self.lin4(x)
        return x

In [39]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = EmbeddingMapper(input_dim=512, output_dim=512).to(device)
criterion = nn.MSELoss()

# Устанавливаем weight_decay для L2-регуляризации
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

In [40]:
def cosine_similarity_torch(x1, x2):
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    return cos(x1, x2)

num_epochs = 4

model.eval()
with torch.no_grad():
    test_loss = 0.0
    total_cosine_sim = 0.0
    total_samples = 0
    
    for inputs, targets, _ in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
    
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * inputs.size(0)
    
        cosine_sim = cosine_similarity_torch(outputs, targets)
        total_cosine_sim += cosine_sim.sum().item()
        total_samples += inputs.size(0)
    
    test_loss = test_loss / len(test_loader.dataset)
    avg_cosine_sim = total_cosine_sim / total_samples
    print(f'Zero shot cosine similarity: {avg_cosine_sim} ')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    print(f'Epoch {epoch+1}/{num_epochs}')
    for inputs, targets, _ in tqdm(train_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)

    # Оценка на тестовой выборке
    model.eval()
    with torch.no_grad():
        test_loss = 0.0
        total_cosine_sim = 0.0
        total_samples = 0

        for inputs, targets, _ in test_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item() * inputs.size(0)

            cosine_sim = cosine_similarity_torch(outputs, targets)
            total_cosine_sim += cosine_sim.sum().item()
            total_samples += inputs.size(0)

        test_loss = test_loss / len(test_loader.dataset)
        avg_cosine_sim = total_cosine_sim / total_samples
    
    torch.save(model.state_dict(), f'checkpoints/refactor_2_embedding_mapper{epoch}.pth')

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, '
          f'Test Loss: {test_loss:.4f}, Cosine Similarity: {avg_cosine_sim:.4f}')

Zero shot cosine similarity: -0.0028013435777535867 
Epoch 1/4


100%|██████████| 1753/1753 [00:51<00:00, 33.78it/s]


Epoch 1/4, Train Loss: 0.3303, Test Loss: 0.2942, Cosine Similarity: 0.8638
Epoch 2/4


100%|██████████| 1753/1753 [00:51<00:00, 34.12it/s]


Epoch 2/4, Train Loss: 0.2713, Test Loss: 0.2780, Cosine Similarity: 0.8719
Epoch 3/4


100%|██████████| 1753/1753 [00:52<00:00, 33.57it/s]


Epoch 3/4, Train Loss: 0.2601, Test Loss: 0.2722, Cosine Similarity: 0.8747
Epoch 4/4


100%|██████████| 1753/1753 [01:08<00:00, 25.46it/s]


Epoch 4/4, Train Loss: 0.2545, Test Loss: 0.2676, Cosine Similarity: 0.8770


In [41]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model = EmbeddingMapper(input_dim=512, output_dim=512).to(device)
# criterion = nn.MSELoss()

# Устанавливаем weight_decay для L2-регуляризации
optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)

In [42]:
num_epochs = 10

model.eval()
with torch.no_grad():
    test_loss = 0.0
    total_cosine_sim = 0.0
    total_samples = 0
    
    for inputs, targets, _ in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
    
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * inputs.size(0)
    
        cosine_sim = cosine_similarity_torch(outputs, targets)
        total_cosine_sim += cosine_sim.sum().item()
        total_samples += inputs.size(0)
    
    test_loss = test_loss / len(test_loader.dataset)
    avg_cosine_sim = total_cosine_sim / total_samples
    print(f'Zero shot cosine similarity: {avg_cosine_sim} ')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    print(f'Epoch {epoch+1}/{num_epochs}')
    for inputs, targets, _ in tqdm(train_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)

    # Оценка на тестовой выборке
    model.eval()
    with torch.no_grad():
        test_loss = 0.0
        total_cosine_sim = 0.0
        total_samples = 0

        for inputs, targets, _ in test_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item() * inputs.size(0)

            cosine_sim = cosine_similarity_torch(outputs, targets)
            total_cosine_sim += cosine_sim.sum().item()
            total_samples += inputs.size(0)

        test_loss = test_loss / len(test_loader.dataset)
        avg_cosine_sim = total_cosine_sim / total_samples
    
    torch.save(model.state_dict(), f'checkpoints/refactor_2_stage2_embedding_mapper{epoch}.pth')

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, '
          f'Test Loss: {test_loss:.4f}, Cosine Similarity: {avg_cosine_sim:.4f}')

Zero shot cosine similarity: 0.8770138735102411 
Epoch 1/10


100%|██████████| 1753/1753 [00:51<00:00, 33.98it/s]


Epoch 1/10, Train Loss: 0.2236, Test Loss: 0.2333, Cosine Similarity: 0.8938
Epoch 2/10


100%|██████████| 1753/1753 [00:51<00:00, 33.85it/s]


Epoch 2/10, Train Loss: 0.2168, Test Loss: 0.2316, Cosine Similarity: 0.8946
Epoch 3/10


100%|██████████| 1753/1753 [00:51<00:00, 34.06it/s]


Epoch 3/10, Train Loss: 0.2158, Test Loss: 0.2308, Cosine Similarity: 0.8949
Epoch 4/10


100%|██████████| 1753/1753 [00:51<00:00, 34.09it/s]


Epoch 4/10, Train Loss: 0.2151, Test Loss: 0.2302, Cosine Similarity: 0.8952
Epoch 5/10


100%|██████████| 1753/1753 [00:51<00:00, 33.90it/s]


Epoch 5/10, Train Loss: 0.2145, Test Loss: 0.2296, Cosine Similarity: 0.8955
Epoch 6/10


100%|██████████| 1753/1753 [00:51<00:00, 33.90it/s]


Epoch 6/10, Train Loss: 0.2141, Test Loss: 0.2292, Cosine Similarity: 0.8957
Epoch 7/10


100%|██████████| 1753/1753 [00:52<00:00, 33.41it/s]


Epoch 7/10, Train Loss: 0.2137, Test Loss: 0.2287, Cosine Similarity: 0.8959
Epoch 8/10


100%|██████████| 1753/1753 [00:51<00:00, 33.98it/s]


Epoch 8/10, Train Loss: 0.2133, Test Loss: 0.2287, Cosine Similarity: 0.8959
Epoch 9/10


100%|██████████| 1753/1753 [00:51<00:00, 34.19it/s]


Epoch 9/10, Train Loss: 0.2130, Test Loss: 0.2283, Cosine Similarity: 0.8961
Epoch 10/10


100%|██████████| 1753/1753 [00:58<00:00, 30.08it/s]


Epoch 10/10, Train Loss: 0.2127, Test Loss: 0.2281, Cosine Similarity: 0.8962


In [1]:
model.eval()
with torch.no_grad():
    test_loss = 0.0
    total_cosine_sim = 0.0
    total_samples = 0
    similarities = []
    for inputs, targets, _ in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        cosine_sim = cosine_similarity_torch(outputs, targets)
        similarities.append(cosine_sim.detach().cpu().numpy())
        total_cosine_sim += cosine_sim.sum().item()
        total_samples += inputs.size(0)


NameError: name 'model' is not defined

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(similarities)

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.