In [1]:
%cd ..

/mnt/e/projects/face_recognition


In [2]:
import os

import albumentations as A
import cv2
import pandas as pd
import torch
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset
from torchvision.models import efficientnet_v2_s
from sklearn.model_selection import train_test_split

from config import CFG

In [3]:
def get_dataset_and_filter_by_label_trashold(label_trashold: int = 10):
    """
    удалим из датасета, тех людей, у которых мало фото - label_trashold.
    Увеличивая label_trashold, мы умешьнаем размер датасета.

    """
    df_identity = pd.read_csv(CFG.identity_path, sep=" ", header=None).sort_values(by=0).reset_index(drop=True)
    df_identity.columns = ["image", "label"]
    cropped_imgs = os.listdir(CFG.img_folder_dst)
    data = pd.DataFrame({"image": cropped_imgs})
    data = data.join(df_identity.set_index("image"), on="image", how="left")
    data_tmp = data.groupby(["label"]).agg({"label": "count"}).rename(columns={"label": "label_count"})
    print(f"Число уникальных людей {len(data_tmp)}, каждый человек имеет, как минимум {label_trashold} фоток.")

    del_label = data_tmp.loc[data_tmp.label_count <= label_trashold, :].index.values
    mask = data["label"].isin(set(del_label))
    data = data[~mask].reset_index(drop=True)
    print(f"Всего фоток {len(data)}")
    df_labels = data["label"].unique()
    map_lables = {l: i for i, l in enumerate(df_labels)}
    data["label"] = data["label"].map(lambda x: map_lables[x])

    return data


def split_by_person(train_size: int = 600, val_zize: int = 200, test_size: int = 200):
    """
    # Сплит по людям.
    train_size, val_zize, test_size - number of unique person
    """
    data = get_dataset_and_filter_by_label_trashold()
    data_tmp = data.groupby(["label"]).agg({"label": "count"}).rename(columns={"label": "label_count"})

    mask_train = data["label"].isin(data_tmp.index[:train_size])
    df_train = data[mask_train].sort_values(by="label").reset_index(drop=True)

    mask_val = data["label"].isin(data_tmp.index[train_size : train_size + val_zize])
    df_val = data[mask_val].sort_values(by="label").reset_index(drop=True)

    mask_test = data["label"].isin(data_tmp.index[train_size + val_zize : train_size + val_zize + test_size])
    df_test = data[mask_test].sort_values(by="label").reset_index(drop=True)

    df: pd.DataFrame
    for df in [df_train, df_val, df_test]:
        df_labels = df["label"].unique()
        map_lables = {l: i for i, l in enumerate(df_labels)}
        df["label"] = df["label"].map(lambda x: map_lables[x])

    return df_train, df_val, df_test


def split_dataset_by_photo(df, label_col, num_val_samples_per_class):
    validation_data = []
    train_data = []
    test_data = []

    for label in df[label_col].unique():
        label_data = df[df[label_col] == label]

        val_samples = label_data.sample(num_val_samples_per_class)
        validation_data.append(val_samples)
        label_data = label_data.drop(val_samples.index)

        test_samples = label_data.sample(num_val_samples_per_class)
        test_data.append(test_samples)
        label_data = label_data.drop(test_samples.index)

        train_data.append(label_data)

    train_df = pd.concat(train_data).reset_index(drop=True)
    validation_df = pd.concat(validation_data).reset_index(drop=True)
    test_df = pd.concat(test_data).reset_index(drop=True)
    print(f"df({len(df)}) -> train({len(train_df)}) val({len(validation_df)}) test({len(test_df)})")

    return train_df, validation_df, test_df


# Split the dataset

# df_train, df_val, df_test = split_by_person()
df_train, df_val, df_test = split_dataset_by_photo(get_dataset_and_filter_by_label_trashold(28), "label", 4)

Число уникальных людей 10174, каждый человек имеет 28 фоток.
Всего фоток 59959
df(59959) -> train(43767) val(8096) test(8096)


In [4]:
class CelebaDataet(Dataset):
    def __init__(self, df: pd.DataFrame) -> None:
        # image, label
        self.df = df.values
        self.transform = A.Compose(
            [
                # A.Resize(height=128, width=128),
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                ToTensorV2(),
            ]
        )

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_path, label = self.df[index]
        img = cv2.imread(os.path.join(CFG.img_folder_dst, img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transform(image=img)["image"]
        return img, label

In [5]:
train_dataset = CelebaDataet(df_train)
val_dataset = CelebaDataet(df_val)
test_dataset = CelebaDataet(df_test)

train_dataloader = DataLoader(train_dataset, batch_size=24, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=24, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=24, shuffle=False)

In [6]:
labels_amount = len(df_train["label"].unique())
labels_amount

2024

In [7]:
model = efficientnet_v2_s()
model.load_state_dict(torch.load("models/efficientnet_v2_s.pth"))
model.classifier = nn.Sequential(nn.Dropout(p=0.2), nn.Linear(in_features=1280, out_features=labels_amount))
None

In [8]:
from copy import deepcopy
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def train(model, train_loader, val_loader, opt, epochs=10):
    train_losses, val_losses, val_full_acc, train_full_acc = [], [], [], []
    best_acc = 0.0
    loss_fn = nn.CrossEntropyLoss()
    best_model_weights = deepcopy(model.state_dict())

    for epoch in range(epochs):
        print("========= Epoch %d/%d =========" % (epoch + 1, epochs))

        # TRAIN
        model.train()
        current_train_loss = 0
        current_train_correct = 0

        for inputs, labels in tqdm(train_loader):
            X_batch = inputs.to(device)
            Y_batch = labels.to(device)

            opt.zero_grad()

            # forward
            Y_pred = model(X_batch)
            preds = torch.argmax(Y_pred, 1)
            loss = loss_fn(Y_pred, Y_batch)
            loss.backward()
            opt.step()

            current_train_loss += loss.item() * X_batch.size(0)
            current_train_correct += torch.sum(preds == Y_batch)

        opt.step()

        train_loss = current_train_loss / len(train_dataset)
        train_losses.append(train_loss)
        train_acc = current_train_correct / len(train_dataset)
        train_full_acc.append(train_acc)
        print("train loss =", train_loss)
        print("train acc = {:.2f}%".format(train_acc.item() * 100))

        # VALIDATION
        model.eval()
        current_val_loss = 0
        current_val_correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                X_val = inputs.to(device)
                Y_val = labels.to(device)

                outputs = model(X_val)
                val_loss = loss_fn(outputs, Y_val)
                preds = torch.argmax(outputs, 1)
                current_val_correct += torch.sum(preds == Y_val)
                current_val_loss += val_loss.item() * X_val.size(0)

        val_acc = current_val_correct / len(val_dataset)
        val_loss = current_val_loss / len(val_dataset)

        print("val loss =", val_loss)
        print("val acc = {:.2f}%".format(val_acc.item() * 100))
        val_losses.append(val_loss)
        val_full_acc.append(val_acc)

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_weights = deepcopy(model.state_dict())
            print("Save new model!")

    return best_model_weights, train_losses, val_losses, val_full_acc, train_full_acc

In [9]:
EPOCHS = 5
model.to(device)

opt = torch.optim.AdamW(model.parameters())
best_model_weights, train_losses, val_losses, val_full_acc, train_full_acc = train(
    model, train_dataloader, val_dataloader, opt, epochs=EPOCHS
)



100%|██████████| 1824/1824 [12:42<00:00,  2.39it/s]


train loss = 6.717970895933904
train acc = 1.70%
val loss = 5.546547515825792
val acc = 5.63%
Save new model!


100%|██████████| 1824/1824 [10:19<00:00,  2.94it/s]


train loss = 4.150379753773024
train acc = 20.02%
val loss = 3.1126482963797604
val acc = 35.84%
Save new model!


100%|██████████| 1824/1824 [12:09<00:00,  2.50it/s]


train loss = 2.099376675688597
train acc = 54.51%
val loss = 1.8500721984701194
val acc = 60.67%
Save new model!


 61%|██████    | 1107/1824 [08:49<05:56,  2.01it/s]