# Компьютерное зрение и области смежные с ним

In [75]:
import pandas as pd
from sklearn.model_selection import train_test_split
from PIL import Image
import cv2
from base64 import b64decode
import numpy as np
import torch
from torchvision import transforms
from torchvision.models import get_model

class Dataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, transform=None):
        self._x = dataframe['image'].values
        self._y = dataframe[['age', 'gender', 'race']].values.astype('float32')
        self._transform = transform

    def __len__(self):
        return len(self._x)

    def __getitem__(self, idx):
        x = self._x[idx]
        x = Image.fromarray(cv2.imdecode(np.frombuffer(b64decode(x.encode('utf-8')), dtype=np.uint8), cv2.IMREAD_COLOR))
        if self._transform:
            x = self._transform(x)
        y = self._y[idx]
        return x, y

dt = pd.read_csv('assets/utkface/data.csv')
train_dt, val_dt = train_test_split(dt)

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.1, saturation=0.1),
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset, val_dataset = Dataset(train_dt, transform=train_transform), Dataset(val_dt, transform=val_transform)

train_dataloader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)
val_dataloader = torch.utils.data.DataLoader(
    dataset=val_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=0
)

In [76]:
class FaceAttributesModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.blocks = [
            torch.nn.Sequential(
                torch.nn.Conv2d(kernel_size=3, in_channels=3, out_channels=32),
                torch.nn.BatchNorm2d(num_features=32),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2)
            ),
            torch.nn.Sequential(
                torch.nn.Conv2d(kernel_size=3, in_channels=32, out_channels=64),
                torch.nn.BatchNorm2d(num_features=64),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2)
            ),
            torch.nn.Sequential(
                torch.nn.Conv2d(kernel_size=3, in_channels=64, out_channels=128),
                torch.nn.BatchNorm2d(num_features=128),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2)
            )
        ]

        self.age_head = torch.nn.Sequential(
            torch.nn.Conv2d(kernel_size=1, in_channels=128, out_channels=64),
            torch.nn.ReLU(),
            torch.nn.AdaptiveAvgPool2d(3),
            torch.nn.Flatten(start_dim=1),
            torch.nn.Linear(in_features=64 * 3 * 3, out_features=1)
        )

        self.gender_head = torch.nn.Sequential(
            torch.nn.Conv2d(kernel_size=1, in_channels=128, out_channels=64),
            torch.nn.ReLU(),
            torch.nn.AdaptiveAvgPool2d(3),
            torch.nn.Flatten(start_dim=1),
            torch.nn.Linear(in_features=64 * 3 * 3, out_features=2)
        )

        self.race_head = torch.nn.Sequential(
            torch.nn.Conv2d(kernel_size=1, in_channels=128, out_channels=64),
            torch.nn.ReLU(),
            torch.nn.AdaptiveAvgPool2d(3),
            torch.nn.Flatten(start_dim=1),
            torch.nn.Linear(in_features=64 * 3 * 3, out_features=5)
        )

    def forward(self, x):
        for block in self.blocks:
            x = block(x)
        age = self.age_head(x)
        gender = self.gender_head(x)
        race = self.race_head(x)
        return age, gender, race


class FaceAttributesModel1(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = get_model('mobilenet_v3_small', weights='DEFAULT')
        self.backbone.classifier = torch.nn.Identity()
        self.age_head = torch.nn.Linear(in_features=576, out_features=1)

        self.gender_head = torch.nn.Linear(in_features=576, out_features=2)

        self.race_head = torch.nn.Linear(in_features=576, out_features=5)

    def forward(self, x):
        x = self.backbone(x)
        age = self.age_head(x)
        gender = self.gender_head(x)
        race = self.race_head(x)
        return age, gender, race

In [78]:
model = FaceAttributesModel1()
loss_f_age = torch.nn.MSELoss()
loss_f_gender = torch.nn.CrossEntropyLoss()
loss_f_race = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1)

num_epochs = 25

for epoch in range(num_epochs):
    print('=========== TRAIN ===========')
    model.train()
    lr_scheduler.step()
    sum_loss_value = 0.0
    for idx, batch in enumerate(train_dataloader):
        optimizer.zero_grad()
        data, targets = batch
        age, gender, race = model(data)

        loss_value_age = loss_f_age(age, targets[:, [0]])

        loss_value_gender = loss_f_gender(gender, targets[:, 1].long())

        loss_value_race = loss_f_race(race, targets[:, 2].long())
        
        loss_value = loss_value_age * 1.0 + loss_value_gender * 100.0 + loss_value_race * 40.0
        loss_value.backward()
        optimizer.step()
        
        sum_loss_value += loss_value.item()

        if idx % 50 == 1:
            print(f'loss [{idx / len(train_dataloader) * 100} %]: {sum_loss_value / idx}')

        if idx == 500:
            break
        
    print(f'loss: {sum_loss_value / len(train_dataloader)}')
        
    print('=========== VAL ===========')
    model.eval()
    metric_value_age, metric_value_gender, metric_value_race, n = 0.0, 0.0, 0.0, 0
    for batch in val_dataloader:
        data, targets = batch
        with torch.no_grad():
            age, gender, race  = model(data)
            metric_value_age += torch.nn.functional.l1_loss(age, targets[:, [0]], reduction='sum')
            metric_value_gender += (gender.argmax(1) == targets[:, 1]).sum()
            metric_value_race += (race.argmax(1) == targets[:, 2]).sum()
            n += len(data)
    print(f'AGE MAE ERROR: {metric_value_age / n}')
    print(f'GENDER ACCURACY ERROR: {metric_value_gender / n}')
    print(f'RACE ACCURACY ERROR: {metric_value_race / n}')

loss [0.0449842555105713 %]: 1865.388427734375
loss [2.2941970310391366 %]: 631.0397219190411
loss [4.543409806567701 %]: 532.6556615546198
loss [6.792622582096267 %]: 482.86437422392385
loss [9.041835357624832 %]: 462.7685754880383
loss [11.291048133153396 %]: 448.46520084213927
loss [13.540260908681962 %]: 437.11374834130373
loss [15.789473684210526 %]: 421.6942397332259
loss [18.038686459739093 %]: 410.66528525792455
loss [20.287899235267655 %]: 399.2611316757033
loss: 87.9034741084639
AGE MAE ERROR: 25.8533878326416
GENDER ACCURACY ERROR: 0.6276362538337708
RACE ACCURACY ERROR: 0.23148304224014282
loss [0.0449842555105713 %]: 964.6327209472656


KeyboardInterrupt: 