In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.optim import Adam
from torch.utils.data import DataLoader
import math
import pandas as pd
import os
from torch.utils.data import Dataset
import cv2
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from torchvision import transforms
import torch.nn.functional as F

In [2]:
idf_path = "D:\\dlsPart2\\identity_CelebA3.txt"
img_folder = "D:\\dlsPart2\\Aligned_Images3"

identity_df = pd.read_csv(idf_path, sep=' ', header=None, names=['filename', 'person_id'])

files = set(os.listdir(img_folder))

filtered_df = identity_df[identity_df['filename'].isin(files)]

num_people = filtered_df['person_id'].nunique()

stats = {
    'total_images': len(filtered_df),
    'unique_people': num_people,
    'images_per_person': filtered_df['person_id'].value_counts().describe().to_dict()
}
print(stats)

{'total_images': 20000, 'unique_people': 956, 'images_per_person': {'count': 956.0, 'mean': 20.92050209205021, 'std': 1.5626391151459447, 'min': 3.0, '25%': 20.0, '50%': 21.0, '75%': 22.0, 'max': 27.0}}


In [3]:
# Функция для разделения данных с сохранением классов
# Сделана, тк изначально сети было сложно обучаться, тк картинки человека могли быть в треине, но их не было в вал выборке
def split_by_person(df, test_size=0.2, random_state=42):
    train_dfs = []
    val_dfs = []

    for person_id in df['person_id'].unique():
        person_data = df[df['person_id'] == person_id]
        if len(person_data) < 2:  # Пропустить классы с 1 изображением
            continue
        # Разделение изображений одного человека
        train_person, val_person = train_test_split(
            person_data, test_size=test_size, random_state=42
        )
        train_dfs.append(train_person)
        val_dfs.append(val_person)

    train_df = pd.concat(train_dfs, ignore_index=True)
    val_df = pd.concat(val_dfs, ignore_index=True)

    return train_df, val_df

train_df, val_df = split_by_person(filtered_df, test_size=0.20, random_state=42)

# Создаем единый словарь для всех данных, которые будут использоваться
all_used_ids = pd.concat([train_df, val_df])['person_id'].unique()
id_to_label_map = {id_val: i for i, id_val in enumerate(all_used_ids)}

num_classes_actual = len(id_to_label_map) 
print(f"Фактическое количество классов для обучения: {num_classes_actual}")

# Проверка, что классы совпадают
train_classes = set(train_df['person_id'].unique())
val_classes = set(val_df['person_id'].unique())
print(f"Number of unique classes in train: {len(train_classes)}")
print(f"Number of unique classes in val: {len(val_classes)}")
print(f"Classes in val but not in train: {len(val_classes - train_classes)}")
print(f"Classes in train but not in val: {len(train_classes - val_classes)}")

#Проверка баланса классов
print("\nTrain dataset class distribution:")
print(train_df['person_id'].value_counts().describe())
print("\nValidation dataset class distribution:")
print(val_df['person_id'].value_counts().describe())

Фактическое количество классов для обучения: 956
Number of unique classes in train: 956
Number of unique classes in val: 956
Classes in val but not in train: 0
Classes in train but not in val: 0

Train dataset class distribution:
count    956.000000
mean      16.375523
std        1.177533
min        2.000000
25%       16.000000
50%       16.000000
75%       17.000000
max       21.000000
Name: count, dtype: float64

Validation dataset class distribution:
count    956.000000
mean       4.544979
std        0.522846
min        1.000000
25%        4.000000
50%        5.000000
75%        5.000000
max        6.000000
Name: count, dtype: float64


In [4]:
class CelebDataset(Dataset):
    def __init__(self, img_folder, identity_data, id_to_label, training=True):
        
        self.img_folder = img_folder
        self.identity_df = identity_data
        self.training = training

        # Фильтрация (оставляем на всякий случай)
        files = set(os.listdir(img_folder))
        self.identity_df = self.identity_df[self.identity_df['filename'].isin(files)]

        self.id_to_label = id_to_label

        self.img_paths = self.identity_df['filename'].tolist()
        # Применяем единый словарь для получения меток
        self.labels = self.identity_df['person_id'].map(self.id_to_label).tolist()

        self.train_transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomHorizontalFlip(p=0.5),  # Отражение по горизонтали
            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),  # Сдвиги и масштабирование
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Цветовые изменения
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        self.val_transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_folder, self.img_paths[idx])
        image = cv2.imread(img_path)
        if image is None:
            raise FileNotFoundError(f"Ошибка загрузки: {img_path}")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.train_transforms(image) if self.training else self.val_transforms(image)

        label = self.labels[idx]
        return image, label

In [5]:
idf_path = "D:\\dlsPart2\\identity_CelebA3.txt"
img_folder = "D:\\dlsPart2\\Aligned_Images3"

train_dataset = CelebDataset(img_folder, train_df, id_to_label_map, training=True)
val_dataset = CelebDataset(img_folder, val_df, id_to_label_map, training=False)

In [None]:
class ArcFaceLayer(nn.Module):
    def __init__(self, in_features, out_features, s=15.0, m=0.3):
        super(ArcFaceLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, embedding, label):

        embedding_norm = F.normalize(embedding)
        weight_norm = F.normalize(self.weight)
        
        cosine = F.linear(embedding_norm, weight_norm)
        
        one_hot = torch.zeros(cosine.size(), device=embedding.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        
        cos_theta_yi = torch.gather(cosine, 1, label.view(-1, 1).long()).view(-1)
        
        cos_theta_yi = torch.clamp(cos_theta_yi, -1.0, 1.0) 
        
        sin_theta_yi = torch.sqrt(1.0 - torch.pow(cos_theta_yi, 2))
        
        cos_theta_plus_m = cos_theta_yi * self.cos_m - sin_theta_yi * self.sin_m
        
        condition = cos_theta_yi > self.th
        phi = torch.where(condition, cos_theta_plus_m, cos_theta_yi - self.mm)
        
        output = cosine.clone()
        output[one_hot.bool()] = phi
        
        output *= self.s
        
        return output

In [7]:
class ResNetWithEmbeddings(nn.Module):
    def __init__(self, embedding_size=512):
        super(ResNetWithEmbeddings, self).__init__()
        base_model = models.resnet50(pretrained=True)

        for param in base_model.parameters():
            param.requires_grad = False
        
        for param in base_model.layer3.parameters():
            param.requires_grad = True
        for param in base_model.layer4.parameters():
            param.requires_grad = True
            
        self.features = nn.Sequential(*list(base_model.children())[:-1])
        self.embedding_layer = nn.Sequential(

            nn.Dropout(p=0.7),
            nn.Linear(base_model.fc.in_features, embedding_size)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        embedding = self.embedding_layer(x)
        return embedding

In [None]:
def train_arcface(model, arcface_layer, train_dataset, val_dataset, epochs=50, batch_size=32, lr=0.001, device='cuda', savepath="model.pth", pre_train=None, tta_steps=2):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

    optimizer = torch.optim.SGD([
        {'params': model.features.parameters(), 'lr': lr/2}, 
        {'params': model.embedding_layer.parameters(), 'lr': lr},
        {'params': arcface_layer.parameters(), 'lr': lr}
    ], momentum=0.9, weight_decay=5e-4)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    arcface_layer.to(device)

    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'best_val_acc': 0.0,
        'best_val_loss': float('inf')
    }

    if pre_train is not None:
        checkpoint = torch.load(pre_train, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        arcface_layer.load_state_dict(checkpoint['arcface_state_dict'])
        print(f"Загружены предобученные веса из {pre_train}")

    
    tta_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    for epoch in range(epochs):

        model.train()
        arcface_layer.train()
        running_loss, correct, total = 0.0, 0, 0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}')

        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            embeddings = model(images)
            outputs = arcface_layer(embeddings, labels)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += loss.item() * images.size(0)
            progress_bar.set_postfix({
                'loss': running_loss / total,
                'acc': f"{100 * correct / total:.2f}%",
                'lr': optimizer.param_groups[0]['lr']
            })

        epoch_train_loss = running_loss / len(train_dataset)
        epoch_train_acc = 100 * correct / total

        history['train_loss'].append(epoch_train_loss)
        history['train_acc'].append(epoch_train_acc)

        model.eval()
        arcface_layer.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        with torch.no_grad():
            for batch_idx, (images, labels) in enumerate(val_loader):
                images, labels = images.to(device), labels.to(device)
                
                #Работаем с оригиналом
                embeddings = model(images)
                outputs = arcface_layer(embeddings, labels)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)

                #TTA
                tta_outputs = [outputs]  

                start_idx = batch_idx * batch_size
                end_idx = min(start_idx + batch_size, len(val_dataset))
                batch_paths = val_dataset.img_paths[start_idx:end_idx]
                augmented_images = torch.stack([
                    tta_transform(cv2.cvtColor(cv2.imread(os.path.join(val_dataset.img_folder, img_path)), 
                                               cv2.COLOR_BGR2RGB)) 
                    for img_path in batch_paths
                ]).to(device)
                aug_embeddings = model(augmented_images)
                aug_outputs = arcface_layer(aug_embeddings, labels)
                tta_outputs.append(aug_outputs)

                avg_outputs = torch.mean(torch.stack(tta_outputs), dim=0)
                
                avg_loss = criterion(avg_outputs, labels)
                val_loss += (avg_loss.item() * images.size(0) - loss.item() * images.size(0)) 
                _, predicted = torch.max(avg_outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        epoch_val_loss = val_loss / len(val_dataset)
        epoch_val_acc = 100 * val_correct / val_total

        history['val_loss'].append(epoch_val_loss)
        history['val_acc'].append(epoch_val_acc)

        scheduler.step(epoch_val_loss)

        if epoch_val_acc > history['best_val_acc']:
            history['best_val_acc'] = epoch_val_acc
            history['best_val_loss'] = epoch_val_loss
            torch.save({
                'model_state_dict': model.state_dict(),
                'arcface_state_dict': arcface_layer.state_dict(),
            }, savepath)
            print(f"Модель сохранена с val_acc: {epoch_val_acc:.2f}%")

        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Loss: {epoch_train_loss:.4f} | Val Loss: {epoch_val_loss:.4f} | "
              f"Train Acc: {epoch_train_acc:.2f}% | Val Acc: {epoch_val_acc:.2f}%")

    return model, history

In [None]:
model = ResNetWithEmbeddings(embedding_size=512)
 
arcface_layer = ArcFaceLayer(in_features=512, out_features=num_classes_actual, s=45.0, m=0.1)

trained_model, history = train_arcface(
    model=model,
    arcface_layer=arcface_layer,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    epochs=80,
    batch_size=128,
    lr=0.0005,
    device='cuda',
    savepath='best_ARC_face_resnet50VBEST2.pth',
    pre_train = 'best_ARC_face_resnet50VBEST.pth'
)

  checkpoint = torch.load(pre_train, map_location=device)


Загружены предобученные веса из best_ARC_face_resnet50V26.pth


Epoch 1/80: 100%|██████████| 123/123 [03:50<00:00,  1.88s/it, loss=0.487, acc=89.01%, lr=0.00025]


Модель сохранена с val_acc: 70.31%
Epoch 1/80 | Train Loss: 0.4874 | Val Loss: 2.9326 | Train Acc: 89.01% | Val Acc: 70.31%


Epoch 2/80: 100%|██████████| 123/123 [02:18<00:00,  1.12s/it, loss=0.497, acc=88.50%, lr=0.00025]


Модель сохранена с val_acc: 70.47%
Epoch 2/80 | Train Loss: 0.4972 | Val Loss: 2.9179 | Train Acc: 88.50% | Val Acc: 70.47%


Epoch 3/80: 100%|██████████| 123/123 [02:14<00:00,  1.10s/it, loss=0.497, acc=88.64%, lr=0.00025]


Модель сохранена с val_acc: 70.61%
Epoch 3/80 | Train Loss: 0.4971 | Val Loss: 2.9221 | Train Acc: 88.64% | Val Acc: 70.61%


Epoch 4/80: 100%|██████████| 123/123 [03:45<00:00,  1.83s/it, loss=0.487, acc=88.89%, lr=0.00025]


Модель сохранена с val_acc: 70.89%
Epoch 4/80 | Train Loss: 0.4868 | Val Loss: 2.8831 | Train Acc: 88.89% | Val Acc: 70.89%


Epoch 5/80: 100%|██████████| 123/123 [02:18<00:00,  1.12s/it, loss=0.484, acc=88.95%, lr=0.00025]


Epoch 5/80 | Train Loss: 0.4839 | Val Loss: 2.9002 | Train Acc: 88.95% | Val Acc: 70.68%


Epoch 6/80: 100%|██████████| 123/123 [02:16<00:00,  1.11s/it, loss=0.496, acc=89.11%, lr=0.00025]


Epoch 6/80 | Train Loss: 0.4958 | Val Loss: 2.9179 | Train Acc: 89.11% | Val Acc: 70.33%


Epoch 7/80: 100%|██████████| 123/123 [02:22<00:00,  1.16s/it, loss=0.496, acc=89.02%, lr=0.00025]


Epoch 7/80 | Train Loss: 0.4962 | Val Loss: 2.9227 | Train Acc: 89.02% | Val Acc: 70.17%


Epoch 8/80: 100%|██████████| 123/123 [02:17<00:00,  1.12s/it, loss=0.49, acc=89.03%, lr=0.00025] 


Epoch 8/80 | Train Loss: 0.4895 | Val Loss: 2.9350 | Train Acc: 89.03% | Val Acc: 70.79%


Epoch 9/80: 100%|██████████| 123/123 [02:22<00:00,  1.16s/it, loss=0.468, acc=89.68%, lr=0.000125]


Epoch 9/80 | Train Loss: 0.4679 | Val Loss: 2.9438 | Train Acc: 89.68% | Val Acc: 70.40%


Epoch 10/80: 100%|██████████| 123/123 [02:19<00:00,  1.13s/it, loss=0.462, acc=89.24%, lr=0.000125]


Epoch 10/80 | Train Loss: 0.4622 | Val Loss: 2.8922 | Train Acc: 89.24% | Val Acc: 70.70%


Epoch 11/80: 100%|██████████| 123/123 [02:19<00:00,  1.13s/it, loss=0.456, acc=89.50%, lr=0.000125]


Epoch 11/80 | Train Loss: 0.4563 | Val Loss: 2.8829 | Train Acc: 89.50% | Val Acc: 70.61%


Epoch 12/80: 100%|██████████| 123/123 [02:18<00:00,  1.13s/it, loss=0.463, acc=89.43%, lr=0.000125]


Модель сохранена с val_acc: 70.98%
Epoch 12/80 | Train Loss: 0.4627 | Val Loss: 2.9194 | Train Acc: 89.43% | Val Acc: 70.98%


Epoch 13/80: 100%|██████████| 123/123 [02:16<00:00,  1.11s/it, loss=0.456, acc=89.64%, lr=6.25e-5]


Epoch 13/80 | Train Loss: 0.4556 | Val Loss: 2.8969 | Train Acc: 89.64% | Val Acc: 70.56%


Epoch 14/80: 100%|██████████| 123/123 [02:16<00:00,  1.11s/it, loss=0.454, acc=89.49%, lr=6.25e-5]


Epoch 14/80 | Train Loss: 0.4537 | Val Loss: 2.8918 | Train Acc: 89.49% | Val Acc: 70.82%


Epoch 15/80: 100%|██████████| 123/123 [02:17<00:00,  1.12s/it, loss=0.462, acc=89.35%, lr=6.25e-5]


Epoch 15/80 | Train Loss: 0.4624 | Val Loss: 2.9017 | Train Acc: 89.35% | Val Acc: 70.54%


Epoch 16/80: 100%|██████████| 123/123 [02:16<00:00,  1.11s/it, loss=0.442, acc=90.02%, lr=6.25e-5]


Epoch 16/80 | Train Loss: 0.4420 | Val Loss: 2.8900 | Train Acc: 90.02% | Val Acc: 70.40%


Epoch 17/80: 100%|██████████| 123/123 [02:17<00:00,  1.11s/it, loss=0.455, acc=89.70%, lr=3.13e-5]


Epoch 17/80 | Train Loss: 0.4546 | Val Loss: 2.8974 | Train Acc: 89.70% | Val Acc: 70.45%


Epoch 18/80: 100%|██████████| 123/123 [02:17<00:00,  1.12s/it, loss=0.444, acc=89.73%, lr=3.13e-5]


Epoch 18/80 | Train Loss: 0.4442 | Val Loss: 2.8929 | Train Acc: 89.73% | Val Acc: 70.36%


Epoch 19/80: 100%|██████████| 123/123 [02:17<00:00,  1.12s/it, loss=0.461, acc=89.14%, lr=3.13e-5]


Epoch 19/80 | Train Loss: 0.4613 | Val Loss: 2.8859 | Train Acc: 89.14% | Val Acc: 70.86%


Epoch 20/80:  71%|███████   | 87/123 [01:38<00:40,  1.13s/it, loss=0.449, acc=89.65%, lr=3.13e-5]


KeyboardInterrupt: 