In [2]:
!pip -q install deepface
!pip -q install ultralytics

In [1]:
import pandas as pd
import numpy as np

import cv2
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

from tqdm import tqdm

import torch
from PIL import Image
from torchvision.transforms import v2
from torchvision import transforms

import os

from sklearn import preprocessing



In [2]:
backends = [
  'opencv', 
  'ssd', 
  'dlib', 
  'mtcnn', 
  'retinaface', 
  'mediapipe',
  'yolov8',
  'yunet',
  'fastmtcnn',
]

if torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'

In [3]:
data = pd.read_csv("/kaggle/input/fac-data-p1/labels.csv")
path = "/kaggle/input/fac-data-p1/data/mnt/md0/projects/sami-hackathon/private/data/"
cols = data.columns
y_raw = data[cols[4:]].values
files_list = data['file_name'].values

In [6]:
labels_set = {}
for col in cols[4:]:
    temp = data[col].unique()
    labels_set[col] = temp
    
labels_set['age'] = ['Baby', 'Kid', 'Teenager', '20-30s', '40-50s', 'Senior']
print(labels_set)

labels = dict()
for col in cols[4:]:
    enc = preprocessing.OneHotEncoder(categories=[labels_set[col]]).fit(data[col].values.reshape(-1 ,1))
    labels[col] = enc.transform(data[col].values.reshape(-1 ,1)).toarray()
    print(enc.categories_)

temp = data[['file_name', 'bbox']].values
file_names = temp[:, 0]
bboxs = temp[:, 1]
bbxs = []
for i in range(len(bboxs)):
    bbxs.append(eval(bboxs[i]))
bboxes = np.array(bbxs, int)
    

{'age': ['Baby', 'Kid', 'Teenager', '20-30s', '40-50s', 'Senior'], 'race': array(['Caucasian', 'Mongoloid', 'Negroid'], dtype=object), 'masked': array(['unmasked', 'masked'], dtype=object), 'skintone': array(['mid-light', 'light', 'mid-dark', 'dark'], dtype=object), 'emotion': array(['Neutral', 'Happiness', 'Anger', 'Surprise', 'Fear', 'Sadness',
       'Disgust'], dtype=object), 'gender': array(['Male', 'Female'], dtype=object)}
[array(['Baby', 'Kid', 'Teenager', '20-30s', '40-50s', 'Senior'],
      dtype=object)]
[array(['Caucasian', 'Mongoloid', 'Negroid'], dtype=object)]
[array(['unmasked', 'masked'], dtype=object)]
[array(['mid-light', 'light', 'mid-dark', 'dark'], dtype=object)]
[array(['Neutral', 'Happiness', 'Anger', 'Surprise', 'Fear', 'Sadness',
       'Disgust'], dtype=object)]
[array(['Male', 'Female'], dtype=object)]


In [50]:
labels_set['age'] = ['20-30s', '40-50s', 'Baby', 'Kid', 'Senior', 'Teenager']
labels_set['emotion']= ['Anger', 'Disgust', 'Fear', 'Happiness', 'Neutral', 'Sadness', 'Surprise']
#labels_set['age'] = {0: '20-30s', 1: '40-50s', 3: 'Kid', 4: 'Senior', 2: 'Baby', 5: 'Teenager'}
#labels_set['emotion'] = {4: 'Neutral', 3: 'Happiness', 0: 'Anger', 6: 'Surprise', 2: 'Fear', 5: 'Sadness', 1: 'Disgust'}

In [10]:

class Dataset(torch.utils.data.Dataset):

    def __init__(self, list_files, bboxes, labels, root):
        self.labels = labels
        self.list_files = list_files
        self.bboxes = bboxes
        self.root = root


    def __len__(self):
        return len(self.list_files)


    def __getitem__(self, index):
        name = self.list_files[index]

        X = Image.open(self.root + '/' + name).convert('RGB')
        if self.bboxes is not None:
            X = X.crop((self.bboxes[index][0], self.bboxes[index][1], self.bboxes[index][0] + self.bboxes[index][2], \
                       self.bboxes[index][1] + self.bboxes[index][3]))

        y = dict()
        y["age"] = labels['age'][index]
        y["race"] = labels['race'][index]
        y["masked"] = labels['masked'][index]
        y["skintone"] = labels['skintone'][index]
        y["emotion"] = labels['emotion'][index]
        y["gender"] = labels['gender'][index]

        X = mytransform(X)

        return X, y


# Model

In [23]:
import torch.nn.functional as F

class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)

class InceptionBlock(nn.Module):
    def __init__(
        self, 
        in_channels, 
        out_1x1,
        red_3x3,
        out_3x3,
        red_5x5,
        out_5x5,
        out_pool,
    ):
        super(InceptionBlock, self).__init__()
        self.branch1 = BasicConv2d(in_channels, out_1x1, kernel_size=1)
        self.branch2 = nn.Sequential(
            BasicConv2d(in_channels, red_3x3, kernel_size=1, padding=0),
            BasicConv2d(red_3x3, out_3x3, kernel_size=3, padding=1),
        )
        self.branch3 = nn.Sequential(
            BasicConv2d(in_channels, red_5x5, kernel_size=1),
            BasicConv2d(red_5x5, out_5x5, kernel_size=5, padding=2),
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
            BasicConv2d(in_channels, out_pool, kernel_size=1),
        )
    
    def forward(self, x):
        branches = (self.branch1, self.branch2, self.branch3, self.branch4)
        return torch.cat([branch(x) for branch in branches], 1)

    
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
    

class SkintoneRaceModel(nn.Module):
    def __init__(self):
        super(SkintoneRaceModel, self).__init__()
        self.conv5x5x48_1 = BasicConv2d(3, 48, kernel_size = (5, 5), stride = 2, padding = 2)
        self.conv3x3x96_1 = BasicConv2d(48, 96, kernel_size = (3, 3), stride = 1, padding = 1)
        self.conv3x3x192_1 = BasicConv2d(96, 192, kernel_size = (3, 3), stride = 1, padding = 1)
        self.conv3x3x384 = BasicConv2d(192, 384, kernel_size = (3, 3), stride = 1, padding = 1)
        
        self.conv5x5x48 = BasicConv2d(3, 48, kernel_size = (5, 5), stride = 2, padding = 1)
        self.conv3x3x96 = BasicConv2d(48, 96, kernel_size = (3, 3), stride = 1, padding = 1)
        self.conv3x3x192 = BasicConv2d(96, 192, kernel_size = (3, 3), stride = 1, padding = 1)
        
        self.inception1 = InceptionBlock(192, 128, 128, 256, 128, 256, 128)
        self.conv3x3x512 = BasicConv2d(128 + 256 + 256 + 128, 512, kernel_size = (3, 3), stride = 1, padding = 1)
        
        self.gap = torch.nn.AdaptiveAvgPool2d(1)
        
        self.fc_race = nn.Linear(512, 3)
        self.fc_skintone = nn.Linear(384, 4)
        self.dropout = nn.Dropout(0.3)
        
    def forward(self, x):
        x_skin = self.conv5x5x48_1(x)
        x_skin = nn.functional.max_pool2d(x_skin, (2, 2), 2)
        
        x_skin = self.conv3x3x96_1(x_skin)
        x_skin = nn.functional.max_pool2d(x_skin, (2, 2), 2)
        
        x_skin = self.conv3x3x192_1(x_skin)
        x_skin = nn.functional.max_pool2d(x_skin, (2, 2), 2)
        
        x_skin = self.conv3x3x384(x_skin)
        x_skin = nn.functional.max_pool2d(x_skin, (2, 2), 2)
        
        N, C, W, H = x_skin.shape
        x_skin = self.gap(x_skin).view(N, -1)
        x_skin = self.dropout(x_skin)
        x_skin = self.fc_skintone(x_skin)
        x_skin = nn.functional.softmax(x_skin, dim = 1)
            
        #  RACE
        x_race = self.conv5x5x48(x)
        x_race = nn.functional.max_pool2d(x_race, (2, 2), 2)
        
        x_race = self.conv3x3x96(x_race)
        x_race = nn.functional.max_pool2d(x_race, (2, 2), 2)
        
        x_race = self.conv3x3x192(x_race)
        x_race = nn.functional.max_pool2d(x_race, (2, 2), 2)
        
        x_race = self.inception1(x_race)
        x_race = nn.functional.max_pool2d(x_race, (2, 2), 2)
        # 1152 x 14 x 14
        
        x_race = self.conv3x3x512(x_race)
        x_race = nn.functional.max_pool2d(x_race, (2, 2), 2)
        
        N, C, W, H = x_race.shape
        #print(x.shape)
        #x = x.view(N, -1)
        x_race = self.gap(x_race).view(N, -1)
        x_race = self.dropout(x_race)
        x_race = self.fc_race(x_race)
        x_race = nn.functional.softmax(x_race, dim = 1)
        
        return {"race": x_race, "skintone": x_skin}
    
    
class MaskedModel(nn.Module):
    def __init__(self,  att_out = False):
        super(MaskedModel, self).__init__()
        self.att_out = att_out
        self.conv5x5x3 = BasicConv2d(3, 3, kernel_size = (5, 5), stride = 1, padding = 2)
        self.conv3x3x96 = BasicConv2d(3, 96, kernel_size = (3, 3), stride = 1, padding = 1)
        self.conv3x3x192 = BasicConv2d(96, 192, kernel_size = (3, 3), stride = 1, padding = 1)
        self.conv3x3x256 = BasicConv2d(192, 256, kernel_size = (3, 3), stride = 1, padding = 1)
        self.inception1 = InceptionBlock(256, 192, 128, 384, 96, 384, 192)
        
        self.spatial_module = SpatialAttention()
        
        self.gap = torch.nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(192 + 384 + 384 + 192, 2)
        self.dropout = nn.Dropout(0.3)
        
    def forward(self, x):
        #x: 3 x 224 x 224
        x = self.conv5x5x3(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.conv3x3x96(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.conv3x3x192(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.conv3x3x256(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.inception1(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        N, C, W, H = x.shape
        sp_att = self.spatial_module(x).view(-1, 1, W, H)

        sp_att2 = sp_att.expand(-1, C, W, H)
        x = x * sp_att2
        
        # 896 x 7 x 7
        x = self.gap(x).view(-1, C)
        x = self.dropout(x)
        
        x = self.fc(x)
        
        x = nn.functional.softmax(x, dim = 1)
        
        if self.att_out:
            return {"masked": x}, sp_att
        
        return {"masked": x}
    

class GenderModel(nn.Module):
    def __init__(self):
        super(GenderModel, self).__init__()
        self.conv5x5x3 = BasicConv2d(3, 3, kernel_size = (5, 5), stride = 1, padding = 2)
        self.conv3x3x96 = BasicConv2d(3, 96, kernel_size = (3, 3), stride = 1, padding = 1)
        self.conv3x3x192 = BasicConv2d(96, 192, kernel_size = (3, 3), stride = 1, padding = 1)
        self.conv3x3x384 = BasicConv2d(192, 384, kernel_size = (3, 3), stride = 1, padding = 1)
        self.inception1 = InceptionBlock(384, 192, 128, 384, 128, 384, 192)
        
        self.gap = torch.nn.AdaptiveAvgPool2d(1)
        
        self.fc = nn.Linear(192 + 384 + 384 + 192, 2)
        self.dropout = nn.Dropout(0.3)
        
    def forward(self, x):
        #x: 3 x 224 x 224
        x = self.conv5x5x3(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.conv3x3x96(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.conv3x3x192(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.conv3x3x384(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        x = self.inception1(x)
        x = nn.functional.max_pool2d(x, (2, 2), 2)
        
        N, C, W, H = x.shape
        # 896 x 7 x 7
        x = self.gap(x).view(N, C)
        x = self.dropout(x)
        
        x = self.fc(x)
        
        x = nn.functional.softmax(x, dim = 1)
           
        return {"gender": x}

In [46]:
class WrapModel(nn.Module):
    def __init__(self):
        super(WrapModel, self).__init__()
        self.model_skin_race = SkintoneRaceModel()
        self.model_masked = MaskedModel()
        self.model_gender = GenderModel()
        self.model_age = torch.load('/kaggle/input/fac-model-vit/model_age.pth')
        self.model_emotion = torch.load('/kaggle/input/fac-model-vit/model_emotion.pth')
        
        
        self.model_skin_race.load_state_dict(torch.load('/kaggle/input/pixta-model2/skintone_race_model_epoch_49'))
        self.model_masked.load_state_dict(torch.load('/kaggle/input/model-pixta/masked_model_epoch_49'))
        self.model_gender.load_state_dict(torch.load('/kaggle/input/pixta-model2/gender_model_epoch_49'))
        
        self.model_skin_race.to(device)
        self.model_masked.to(device)
        self.model_gender.to(device)
        self.model_age.to(device)
        self.model_emotion.to(device)
        
        self.model_skin_race.eval()
        self.model_masked.eval()
        self.model_gender.eval()
        self.model_age.eval()
        self.model_emotion.eval()
        
    def forward(self, x):
        age = self.model_age(x)
        gender = self.model_gender(x)
        skin_race = self.model_skin_race(x)
        masked = self.model_masked(x)
        
        emotion = self.model_emotion(x)
        age = self.model_age(x)
        
        return gender | skin_race | masked | masked | {'emotion': emotion} | {'age': age}
    

# Run

In [47]:
model = WrapModel()
model.eval()
model.to(device)

WrapModel(
  (model_skin_race): SkintoneRaceModel(
    (conv5x5x48_1): BasicConv2d(
      (conv): Conv2d(3, 48, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
      (bn): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv3x3x96_1): BasicConv2d(
      (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv3x3x192_1): BasicConv2d(
      (conv): Conv2d(96, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv3x3x384): BasicConv2d(
      (conv): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv5x5x48): BasicConv2d(
      (conv): Conv2d(3

In [12]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [54]:
from deepface import DeepFace
from tqdm import tqdm


df = pd.DataFrame(columns=["file_name",
                           "bbox",
                           "image_id",
                           "race",
                           "age",
                           "emotion", 
                           "gender",
                           "skintone",
                           "masked"], index=None)

csv_file = '/kaggle/input/fac-public-data-test-1/answer.csv'
df_old = pd.read_csv(csv_file)
image_dir = '/kaggle/input/fac-public-data-test-1/public_test/public_test'

In [55]:
import json
json_file_path = '/kaggle/input/fac-public-data-test-1/file_name_to_image_id.json'
with open(json_file_path, 'r') as json_file:
    data_name_to_id = json.load(json_file)
    
all_files = os.listdir(image_dir)

In [56]:
labels_set

{'age': ['20-30s', '40-50s', 'Baby', 'Kid', 'Senior', 'Teenager'],
 'race': array(['Caucasian', 'Mongoloid', 'Negroid'], dtype=object),
 'masked': array(['unmasked', 'masked'], dtype=object),
 'skintone': array(['mid-light', 'light', 'mid-dark', 'dark'], dtype=object),
 'emotion': ['Anger',
  'Disgust',
  'Fear',
  'Happiness',
  'Neutral',
  'Sadness',
  'Surprise'],
 'gender': array(['Male', 'Female'], dtype=object)}

In [57]:
for image_name in tqdm(all_files, desc="Processing images"):
    with torch.no_grad():

        image_path = os.path.join(image_dir, image_name)
        #img = Image.open(image_path)
        #if img.mode != 'RGB':
        #    img = img.convert('RGB')
        #img = np.array(img)

        faces = DeepFace.extract_faces(img_path = image_path, 
            target_size = (224, 224), 
            detector_backend = backends[6],
            enforce_detection = False
            )

        if isinstance(faces, list):
            for i in range(len(faces)):
                bboxs = faces[i]['facial_area']
                x, y, w, h = bboxs['x'], bboxs['y'], bboxs['w'], bboxs['h']
                
                
                temp = faces[i]['face'].copy()
                face = torch.Tensor(temp).permute(2, 0, 1)

                results = model(face.unsqueeze(0).to(device))
                    
                #print(results)
                
                bbox = str([x, y, w, h])
                #print(skintone_race['race'].argmax().item())

                id = data_name_to_id[image_name]
                idx_skin = results['skintone'].argmax().item()
                idx_race = results['race'].argmax().item()
                idx_masked = results['masked'].argmax().item()
                idx_gender = results['gender'].argmax().item()
                idx_emotion = results['emotion'][0].argmax().item()
                idx_age = results['age'][0].argmax().item()

                #print(idx_emotion, idx_age)
                
                new_data = {
                    'file_name': image_name,
                    'bbox': bbox,
                    'image_id': id,
                    'race': labels_set['race'][idx_race],
                    'age': labels_set['age'][idx_age],
                    'emotion': labels_set['emotion'][idx_emotion],
                    'gender' : labels_set['gender'][idx_gender],
                    'skintone': labels_set['skintone'][idx_skin],
                    'masked' : labels_set['masked'][idx_masked]
                }

                df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
                del face, temp
        else:
            id = data_name_to_id[image_name]
            new_data = {
                    'file_name': image_name,
                    'bbox': '[0, 0, 10, 10]',
                    'image_id': id,
                    'race': 'None',
                    'age': 'None',
                    'emotion': 'None',
                    'gender' : 'None',
                    'skintone': 'None',
                    'masked' : 'None'
            }
            df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
        torch.cuda.empty_cache()

Processing images: 100%|██████████| 2184/2184 [06:36<00:00,  5.51it/s]


In [68]:
updated_csv_file = 'answer1.csv'
df.to_csv('/kaggle/working/answer.csv', index=False)
print(f"CSV file is saved as {updated_csv_file}.")

CSV file is saved as answer1.csv.


In [61]:
df

Unnamed: 0,file_name,bbox,image_id,race,age,emotion,gender,skintone,masked
0,69408896.jpg,"[601, 351, 206, 333]",991,Mongoloid,20-30s,Happiness,Female,light,unmasked
1,69934968.jpg,"[853, 247, 248, 309]",1010,Caucasian,20-30s,Anger,Female,light,masked
2,3410477.jpg,"[630, 259, 477, 721]",214,Mongoloid,Senior,Fear,Male,light,unmasked
3,44989956.jpg,"[1365, 263, 170, 229]",376,Caucasian,20-30s,Happiness,Female,light,unmasked
4,79053495.jpg,"[397, 110, 303, 417]",1316,Mongoloid,20-30s,Neutral,Female,light,masked
...,...,...,...,...,...,...,...,...,...
2253,image_7495.jpg,"[181, 254, 605, 686]",2092,Caucasian,Kid,Happiness,Female,mid-light,unmasked
2254,image_4584.jpg,"[234, 211, 583, 703]",1950,Caucasian,20-30s,Neutral,Female,light,unmasked
2255,12050001.jpg,"[493, 206, 338, 479]",61,Caucasian,40-50s,Neutral,Male,light,unmasked
2256,67251347.jpg,"[1064, 208, 272, 387]",910,Mongoloid,20-30s,Happiness,Male,mid-light,unmasked


In [65]:
pd.read_csv('answer.csv')

Unnamed: 0,file_name,bbox,image_id,race,age,emotion,gender,skintone,masked
0,69408896.jpg,"[601, 351, 206, 333]",991,Mongoloid,20-30s,Happiness,Female,light,unmasked
1,69934968.jpg,"[853, 247, 248, 309]",1010,Caucasian,20-30s,Anger,Female,light,masked
2,3410477.jpg,"[630, 259, 477, 721]",214,Mongoloid,Senior,Fear,Male,light,unmasked
3,44989956.jpg,"[1365, 263, 170, 229]",376,Caucasian,20-30s,Happiness,Female,light,unmasked
4,79053495.jpg,"[397, 110, 303, 417]",1316,Mongoloid,20-30s,Neutral,Female,light,masked
...,...,...,...,...,...,...,...,...,...
2253,image_7495.jpg,"[181, 254, 605, 686]",2092,Caucasian,Kid,Happiness,Female,mid-light,unmasked
2254,image_4584.jpg,"[234, 211, 583, 703]",1950,Caucasian,20-30s,Neutral,Female,light,unmasked
2255,12050001.jpg,"[493, 206, 338, 479]",61,Caucasian,40-50s,Neutral,Male,light,unmasked
2256,67251347.jpg,"[1064, 208, 272, 387]",910,Mongoloid,20-30s,Happiness,Male,mid-light,unmasked
