In [1]:
# !pip install onnxruntime-gpu insightface

Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease [1581 B]
Get:2 https://packages.cloud.google.com/apt gcsfuse-focal InRelease [1227 B]   
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  Packages [1673 kB]
Get:4 https://packages.cloud.google.com/apt cloud-sdk InRelease [1618 B]       
Get:5 http://security.ubuntu.com/ubuntu focal-security InRelease [128 kB]      
Hit:6 http://archive.ubuntu.com/ubuntu focal InRelease                         
Get:7 https://packages.cloud.google.com/apt google-fast-socket InRelease [1071 B]
Get:8 https://packages.cloud.google.com/apt gcsfuse-focal/main amd64 Packages [26.3 kB]
Get:9 http://archive.ubuntu.com/ubuntu focal-updates InRelease [128 kB]        
Get:10 https://packages.cloud.google.com/apt cloud-sdk/main all Packages [1517 kB]
Get:11 https://packages.cloud.google.com/apt cloud-sdk/main amd64 Packages [3232 kB]
Get:12 http://security.ubuntu.com/ubuntu focal-security/univ

In [2]:
import os
import json

from PIL import Image
from tqdm.notebook import tqdm
import gc
import numpy as np
import pandas as pd
import time
import copy
from collections import Counter
import ast

# from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import GradScaler, autocast
from torch.utils.tensorboard import SummaryWriter
import torch.nn.init as init

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import KFold

from timm.scheduler.scheduler import Scheduler
from timm.loss import LabelSmoothingCrossEntropy
import timm

from torchvision import transforms as T
import torchvision

import cv2
# import insightface
# from insightface.app import FaceAnalysis
# from insightface.data import get_image as ins_get_image
import matplotlib.pyplot as plt

# Dataset Preparation for ABAW

In [2]:
def data_builder(label_path, train = True):
    # read annotations
    df = pd.read_csv(label_path, names = ['image', 'val', 'arousal', 'expr', 'au1', 'au2', 'au3', 'au4', 'au5', 'au6', 'au7', 'au8', 'au9', 'au10', 'au11', 'au12'] )
    # Remove the first row
    df = df.drop(index=0)

    # reset index
    df = df.reset_index(drop=True)

    df[['folder', 'image']] = df['image'].str.split('/', expand=True)

    grouped = df.groupby(['folder'], as_index= False).apply(lambda x: x.sort_values('image'))
    grouped['expr'] = grouped['expr'].astype(int)
    grouped = grouped.reset_index(drop=True)

    # generating data dictionary
    data_dict = {}
    emo = ['Neutral', 'Anger', 'Disgust', 'Fear', 'Happiness', 'Sadness','Surprise', 'Other']
    seq = 0
    current = -1
    for index, row in grouped.iterrows():
        if row['expr'] != current:
            current = row['expr']
            if train:
                seq += 1

        if row['expr'] == -1:
            continue

        if row['folder'] in data_dict:
            data_dict[row['folder']].append(
                (os.path.join(row['folder'], row['image']), row['expr'])
            )
        else:
            data_dict[row['folder']] = [(os.path.join(row['folder'], row['image']), row['expr']), ]
            
    return data_dict

def most_common_element(lst):
    if not lst:
        return None  # Return None if the list is empty
    counter = Counter(lst)
    most_common = counter.most_common(1)[0]  # Get the most common element
    return most_common[0]

def sequence_extractor(data_dict, data_path, min_stride = 5, sequence_length = 5, train = True):
    train_seqs = []
    train_labels = []
    for folder in data_dict.keys():
        s_len = len(data_dict[folder])
        max_start_index = s_len - (sequence_length - 1) * min_stride
        for i in range(0, max_start_index, sequence_length * min_stride):
            sublist = [os.path.join(data_path, data_dict[folder][i + j * min_stride][0]) for j in range(5)]
            train_seqs.append(sublist)
            t_lab = [data_dict[folder][i + j * min_stride][1] for j in range(5)]

            train_labels.append(most_common_element(t_lab))
    return train_seqs, train_labels


In [4]:
# len(data_dict['107']['Other0'])
val_seq = []
val_labels = []
data_path = '/kaggle/input/abaw-7-dataset/cropped_aligned'


def most_common_element(lst):
    if not lst:
        return None  # Return None if the list is empty
    counter = Counter(lst)
    most_common = counter.most_common(1)[0]  # Get the most common element
    return most_common[0]

stride = 4
for key in data_dict.keys():
    for exp in data_dict[key].keys():
        n = len(data_dict[key][exp])
        # Calculate the maximum starting index that allows creating a sublist of length 5
        max_start_index = n - (5 - 1) * stride
        for i in range(0, max_start_index, 5 * stride):
            sublist = [os.path.join(data_path, data_dict[key][exp][i + j * stride][0]) for j in range(5)]
            val_seq.append(sublist)
            t_lab = [data_dict[key][exp][i + j * stride][1] for j in range(5)]
            
            val_labels.append(most_common_element(t_lab))


len(val_seq)
# data_dict['107']['Other0'][0]

NameError: name 'data_dict' is not defined

In [3]:
class ABAWFeatureDataset(Dataset):
    def __init__(self, features, labels, transform = None):
        self.features = features
        self.labels = labels
        self.length = len(labels)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        frames = [Image.open(i).convert('RGB') for i in self.features[idx]]
        if self.transform:
            frames = [self.transform(frame) for frame in frames]
        frames = torch.stack(frames)        # Stack frames to form a 4D tensor (T, C, H, W)
        frames = frames.permute(1, 0, 2, 3)  # Change order to (C, T, H, W)
        label = self.labels[idx]
        return frames, label

In [36]:
def get_analysis_train_dataloader(data_path, label_path, batch_size, num_epochs = 20, epochs = 0):

    dataset_train = RAFDBDataset(choose="train",
        data_path=data_path,
        label_path=label_path,
        app = None,
        transform = None,
        img_size = 224,
        num_epochs = num_epochs,
        epochs = epochs
                                 
    )

    data_loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=batch_size,
        drop_last=True,
    )
    return data_loader_train, dataset_train.length, dataset_train


def get_analysis_val_dataloader(data_path, label_path, batch_size, num_epochs = 20, epochs = 0):

    dataset_val = RAFDBDataset(choose="test",
        data_path=data_path,
        label_path=label_path,
        app = None,        
        transform = None,
        img_size = 224,
        num_epochs = num_epochs,
        epochs = epochs
    )

    data_loader_val = torch.utils.data.DataLoader(
        dataset_val,
        batch_size=batch_size,
        shuffle=False,
        drop_last=False
    )
    return data_loader_val, dataset_val.length, dataset_val

In [4]:
landmarks_cache = {}

# Open the file and read all lines first to calculate total length for progress bar
with open('/kaggle/input/raf-db-trial/keypoints.txt', 'r') as file:
    lines = file.readlines()

# Iterate over lines with progress bar
for line in tqdm(lines, desc="Processing landmarks", unit="lines"):
    # Split at the first occurrence of the colon
    file_name, coordinates = line.split(":", 1)

    # Remove whitespace around the file name
    file_name = file_name.strip()

    # Convert the string representation of the list of tuples to an actual list of tuples
    coordinates = ast.literal_eval(coordinates.strip())

    # Add to the dictionary
    landmarks_cache[file_name] = coordinates

Processing landmarks:   0%|          | 0/15308 [00:00<?, ?lines/s]

In [4]:
for key in landmarks_cache.keys():
    break
key

'train_05914.jpg'

In [6]:
type(landmarks_cache['train_05914.jpg'][0])

tuple

In [27]:
class RAFDBDataset(Dataset):
    def __init__(self, choose, data_path, label_path, app, transform=None, img_size=224, num_epochs = 20, epochs = 0):
        self.image_paths = []
        self.labels = []
        self.data_path = data_path
        self.label_path = label_path
        self.app = app
        self.landmarks_cache = landmarks_cache  # Cache to store landmark coordinates
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.num_epoch = num_epochs
        self.epochs = epochs
        self.choose = choose

        if transform:
            self.transform = transform
        else:
            self.transform = T.Compose([
                T.Resize(256),
                T.CenterCrop(224),
                T.ToTensor(),
                T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD)
            ])

        self.train = True if choose == "train" else False

        if choose == "train" or choose == "test":
            with open(self.label_path, "r") as f:
                data = f.readlines()

            for i in range(0, len(data)):
                line = data[i].strip('\n').split(" ")

                image_name = line[0]
                sample_temp = image_name.split("_")[0]

                if self.train and sample_temp == "train":
                    image_path = os.path.join(self.data_path, image_name)
                    self.image_paths.append(image_path)
                    self.labels.append(int(line[1]) - 1)

                elif not self.train and sample_temp == "test":
                    image_path = os.path.join(self.data_path, image_name)
                    self.image_paths.append(image_path)
                    self.labels.append(int(line[1]) - 1)
                    
        self.length = len(self.labels)
        self.labels = np.asarray(self.labels)

    def draw_squares_on_landmarks(self, img, landmarks, n):
        dimg = img.copy()
        half_n = n // 2
        for kp in landmarks:
            # Ensure coordinates are integers
            top_left = (int(kp[0] - half_n), int(kp[1] - half_n))
            bottom_right = (int(kp[0] + half_n), int(kp[1] + half_n))
            cv2.rectangle(dimg, top_left, bottom_right, (0, 0, 0), -1)  # -1 fills the rectangle
        return dimg

    def get_landmarks(self, img_path):
        if img_path not in self.landmarks_cache:
            return None
            img = cv2.imread(img_path)
            nimg = cv2.resize(img, (224,224))
            k = 100
            padded_image = cv2.copyMakeBorder(
                nimg,
                k,
                k,
                k,
                k,
                cv2.BORDER_CONSTANT,  # Border type
                value=[0, 0, 0]       # Padding color (black in this case)
            )
#             padded_image = padded_image.to(self.device)  # Ensure the image is on the right device
            out = self.app.get(padded_image)
            
            if len(out) == 0:  # Check if any faces were detected
                return None
            
            landmarks = out[0].landmark_2d_106.astype(np.int64)
            self.landmarks_cache[img_path] = landmarks
        return self.landmarks_cache[img_path]

    def augment(self, img_path):
        img = cv2.imread(img_path)
        nimg = cv2.resize(img, (224, 224))
        landmarks = self.get_landmarks(img_path.split('/')[-1])
        
        if landmarks is None:  # Skip if no faces are detected
            return nimg, nimg
        
        k = 100
        padded_image = cv2.copyMakeBorder(
            nimg,
            k,
            k,
            k,
            k,
            cv2.BORDER_CONSTANT,  # Border type
            value=[0, 0, 0]       # Padding color (black in this case)
        )
        
        # Use landmarks to draw squares on the padded image
        aug = self.draw_squares_on_landmarks(padded_image, landmarks, n=max(0,self.num_epoch - self.epochs - 5))
        height, width = padded_image.shape[:2]
        cropped_image = aug[k:height-k, k:width-k]
        return nimg, cropped_image

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        if self.choose == 
        img1, img2 = self.augment(img_path)
        
        if self.transform:
            img1 = self.transform(Image.fromarray(img1))
            img2 = self.transform(Image.fromarray(img2))
            
        img1 = img1.unsqueeze(1)
        img2 = img2.unsqueeze(1)
        label = torch.tensor(self.labels[idx])
        return img1, img2, label

    def __len__(self):
        return self.length

In [10]:
data_path = '/kaggle/input/raf-db-trial/92_86/92/dataset/RAF'
label_path = '/kaggle/input/raf-db-trial/92_86/92/dataset/list_patition_label.txt'
batch_size = 16

# data_loader_train, train_len, d = get_analysis_train_dataloader(data_path, label_path, batch_size, num_epochs = 20, epochs = 0)

In [16]:
t = d.augment('/kaggle/input/raf-db-trial/92_86/92/dataset/RAF/test_0001.jpg')

In [20]:
type(t[1])

numpy.ndarray

# Code below is for ABAW, code above is for RAFDB

In [5]:
train_label_path = '/kaggle/input/abaw-7-dataset/training_set_annotations.txt'
val_label_path = '/kaggle/input/abaw-7-dataset/validation_set_annotations.txt'
data_path = '/kaggle/input/abaw-7-dataset/cropped_aligned'

batch_size = 8
stride = 5
sequence_length = 10

train_seqs, train_labels = sequence_extractor(
    data_dict = data_builder(train_label_path, train = False),
    data_path = data_path,
    min_stride = stride,
    sequence_length = sequence_length
)

val_seqs, val_labels = sequence_extractor(
    data_dict = data_builder(val_label_path, train = False),
    data_path = data_path,
    min_stride = stride,
    sequence_length = sequence_length,
    train = False
)

train_dataset = ABAWFeatureDataset(train_seqs, train_labels, transform = T.Compose([
        T.RandomHorizontalFlip(p=0.5),  # Flip the frame with a probability of 0.5
        T.Resize(256),
        T.CenterCrop(224),
        T.ToTensor(),  # Converts the image to a tensor and normalizes to [0, 1]
        T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD),  # Normalization using ImageNet mean and std
    ])
)
val_dataset = ABAWFeatureDataset(val_seqs, val_labels, transform = T.Compose([
        T.Resize(256),
        T.CenterCrop(224),
        T.ToTensor(),
        T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD)
    ])
)

data_loader_train = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)
data_loader_val = DataLoader(val_dataset, batch_size=batch_size, shuffle = True)

dataloaders = {
    "train": data_loader_train,
    "test": data_loader_val
}

dataset_sizes = {
    "train": len(train_dataset),
    "test": len(val_dataset)
}

dataset_sizes

  df = pd.read_csv(label_path, names = ['image', 'val', 'arousal', 'expr', 'au1', 'au2', 'au3', 'au4', 'au5', 'au6', 'au7', 'au8', 'au9', 'au10', 'au11', 'au12'] )
  grouped = df.groupby(['folder'], as_index= False).apply(lambda x: x.sort_values('image'))


ValueError: range() arg 3 must not be zero

In [24]:
model = torchvision.models.video.swin3d_b(weights="KINETICS400_IMAGENET22K_V1")

# model = torchvision.models.video.swin3d_b(weights=None)

# state_dict = torch.load('/kaggle/working/trained_model/freezing_epoch_10.pth', map_location=torch.device('cpu'))

# Step 3: Apply the loaded weights to the model
# model.load_state_dict(state_dict)

num_ftrs = model.head.in_features # Get the input features of the current head

# Step 2: Create a new head
new_head = nn.Sequential(
 nn.Linear(num_ftrs, 7) # Final layer with 3 output units (for 3 classes)
)

model.head = new_head

# Freeze all layers first
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last few layers
for name, param in model.named_parameters():
    if any(layer_name in name for layer_name in ["features.6", "norm", "avgpool", "head"]):
        param.requires_grad = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

# app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
# app.prepare(ctx_id=0, det_size=(640, 640))
# app.to(device)

run = 9

# labels_np = np.array(train_labels)
# class_counts = np.bincount(labels_np)          # Generate class counts
# class_weights = 1.0 / class_counts             # Calculate inverse of class counts
# class_weights = class_weights / class_weights.sum()            # Normalize the weights to sum to 1 (optional)
# class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)

# criterion = LabelSmoothingCrossEntropy()
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
optimizer = optim.AdamW(model.head.parameters(), lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.97)
writer = SummaryWriter(f'run{run}')

In [23]:
os.remove('/kaggle/working/run9/events.out.tfevents.1725358595.da10f4d75707.34.0')

In [None]:
for name, param in model.named_parameters():
    print(name)

In [32]:
def train_model(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, device, writer, run, num_epochs=20, app=None):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print("-" * 10)
        
        data_loader_train, train_len, dataset_train = get_analysis_train_dataloader(data_path, label_path, batch_size, num_epochs = num_epochs, epochs = epoch)
        data_loader_val, val_len, dataset_val = get_analysis_val_dataloader(data_path, label_path, batch_size, num_epochs = num_epochs, epochs = epoch)

        dataloaders = {
            "train": data_loader_train,
            "test": data_loader_val
        }

        dataset_sizes = {
            "train": train_len,
            "test": val_len
        }
        
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            
            running_loss = 0.0
            running_corrects = 0.0
            all_preds = []
            all_labels = []
            
            for input1, input2, labels in tqdm(dataloaders[phase]):
                inputs = torch.cat((input1, input2), dim = 0)
                labels = torch.cat((labels, labels), dim = 0)

                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
            
#             if phase == 'train' and (epoch+1) % 5 == 0:
#                 scheduler.step()
#                 torch.save(model.state_dict(), f'/kaggle/working/trained_model/phase6_freezing_epoch_{epoch}.pth')
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / (dataset_sizes[phase]*2)
            # Calculate F1 score, precision, and recall
            epoch_f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
            epoch_precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
            epoch_recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
            
            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} F1: {epoch_f1:.4f} Precision: {epoch_precision:.4f} Recall: {epoch_recall:.4f}")
            
            # Log the metrics
            writer.add_scalar(f'{phase}{run}/Loss', epoch_loss, epoch)
            writer.add_scalar(f'{phase}{run}/Accuracy', epoch_acc, epoch)
            writer.add_scalar(f'{phase}{run}/F1', epoch_f1, epoch)
            writer.add_scalar(f'{phase}{run}/Precision', epoch_precision, epoch)
            writer.add_scalar(f'{phase}{run}/Recall', epoch_recall, epoch)
            
            # Log the learning rate as a scalar
            current_lr = optimizer.param_groups[0]['lr']
            writer.add_scalar(f'{phase}6/Learning_Rate', current_lr, epoch)
        
        print()
    
    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')

    torch.save(model.state_dict(), f'/kaggle/working/trained_model/phase6{run}freezing_final_epoch_{num_epochs}.pth')  # Save the model
    best_model_wts = copy.deepcopy(model.state_dict())
    model.load_state_dict(best_model_wts)
    return model

In [26]:
# Increase the message rate limit for printing training progress
from notebook.services.config import ConfigManager
cm = ConfigManager().update('notebook', {
    "NotebookApp": {
        "iopub_msg_rate_limit": 10000,  # Increase to 10000 messages/sec
        "rate_limit_window": 10.0,      # Increase the rate limit window to 10 seconds
    }
})

In [None]:
num_epochs = 20

description = (
    f"Training parameters:\n"
    f"Model: {model.__class__.__name__}\n"
    f"Criterion: {criterion.__class__.__name__}\n"
    f"Optimizer: {optimizer.__class__.__name__}\n"
    f"Scheduler: {exp_lr_scheduler.__class__.__name__}\n"
    f"Device: {device}\n"
    f"Number of epochs: {num_epochs}\n"
    f"Batch Size: {batch_size}\n"
#     f"Stride: {stride}\n"
#     f"Sequence Length: {sequence_length}"
)
writer.add_text(f"Desc_{run}/Training Parameters", description, global_step=0)
model_ft = train_model(model,criterion, optimizer, exp_lr_scheduler, dataloaders = None, dataset_sizes = None, device = device, writer = writer, run = run, num_epochs=num_epochs)

Epoch 0/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.5393 Acc: 0.7895 F1: 0.7850 Precision: 0.7849 Recall: 0.7905


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 28.1276 Acc: 0.2216 F1: 0.0804 Precision: 0.0491 Recall: 0.2216

Epoch 1/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.7671 Acc: 0.8059 F1: 0.8028 Precision: 0.8006 Recall: 0.8069


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 14.9922 Acc: 0.3390 F1: 0.2776 Precision: 0.6471 Recall: 0.3390

Epoch 2/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.4606 Acc: 0.8178 F1: 0.8158 Precision: 0.8141 Recall: 0.8188


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 13.9388 Acc: 0.3781 F1: 0.3313 Precision: 0.6275 Recall: 0.3781

Epoch 3/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.3874 Acc: 0.8248 F1: 0.8233 Precision: 0.8219 Recall: 0.8258


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 13.4597 Acc: 0.3908 F1: 0.3489 Precision: 0.7324 Recall: 0.3908

Epoch 4/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.3414 Acc: 0.8284 F1: 0.8268 Precision: 0.8254 Recall: 0.8294


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 13.4059 Acc: 0.4045 F1: 0.3668 Precision: 0.7060 Recall: 0.4045

Epoch 5/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.3264 Acc: 0.8315 F1: 0.8304 Precision: 0.8292 Recall: 0.8325


  0%|          | 0/192 [00:00<?, ?it/s]

train Loss: 1.3026 Acc: 0.8358 F1: 0.8345 Precision: 0.8332 Recall: 0.8368


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 13.6085 Acc: 0.4065 F1: 0.3706 Precision: 0.7174 Recall: 0.4065

Epoch 7/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.2707 Acc: 0.8375 F1: 0.8365 Precision: 0.8354 Recall: 0.8385


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 13.4405 Acc: 0.4091 F1: 0.3733 Precision: 0.7156 Recall: 0.4091

Epoch 8/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

train Loss: 1.2639 Acc: 0.8386 F1: 0.8376 Precision: 0.8365 Recall: 0.8396


  0%|          | 0/192 [00:00<?, ?it/s]

test Loss: 13.7767 Acc: 0.4094 F1: 0.3734 Precision: 0.7337 Recall: 0.4094

Epoch 9/19
----------


  0%|          | 0/766 [00:00<?, ?it/s]

In [82]:
data_loader_train, train_len = get_analysis_train_dataloader(data_path, label_path, batch_size, num_epochs = 20, epochs = 0)

for a,b,c in data_loader_train:
    break
    
a.shape

self num_epochs =  <class 'int'> 20
self epochs =  <class 'int'> 0
out num_epochs =  <class 'int'> 20
out epochs =  <class 'int'> 0


torch.Size([16, 3, 1, 224, 224])

In [89]:
inputs = torch.cat((a, b), dim = 0)
labels = torch.cat((c, c), dim = 0)

inputs.shape

torch.Size([32, 3, 1, 224, 224])

In [70]:

a.unsqueeze(0).shape

torch.Size([1, 16, 3, 224, 224])

In [90]:
x = inputs.to(device)
t = model(x)

In [91]:
t.shape

torch.Size([32, 7])

# Independent Validation

In [15]:
import gc

gc.collect()

torch.cuda.empty_cache()

In [None]:
val_seqs, val_labels = sequence_extractor(
    data_dict = data_builder(val_label_path, train = False),
    data_path = data_path,
    min_stride = 1,
    sequence_length = 5,
    train = False
)

val_dataset = ABAWFeatureDataset(val_seqs, val_labels, transform = T.Compose([
        T.Resize(256),
        T.CenterCrop(224),
        T.ToTensor(),
        T.Normalize(timm.data.IMAGENET_DEFAULT_MEAN, timm.data.IMAGENET_DEFAULT_STD)
    ])
)

data_loader_val = DataLoader(val_dataset, batch_size=batch_size, shuffle = True)


In [None]:
running_loss = 0.0
running_corrects = 0.0
all_preds = []
all_labels = []
            
    
for inputs, labels in tqdm(data_loader_val):
    inputs = inputs.to(device)
    labels = labels.to(device)

#     optimizer.zero_grad()

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)

    all_preds.extend(preds.cpu().numpy())
    all_labels.extend(labels.cpu().numpy())

epoch_loss = running_loss / dataset_sizes['val']
epoch_acc = running_corrects.double() / dataset_sizes['val']

# Calculate F1 score, precision, and recall
epoch_f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
epoch_precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
epoch_recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)

print(f"{'val'} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} F1: {epoch_f1:.4f} Precision: {epoch_precision:.4f} Recall: {epoch_recall:.4f}")
            

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Example list of image paths
data_path = '/kaggle/input/abaw-7-dataset/cropped_aligned'
image_paths = [os.path.join(data_path, x) for x in train_seqs[0]]
labels = [train_labels[0] for x in train_seqs[0]]
# Number of images
n_images = len(image_paths)

# Calculate grid size (assuming a square grid)
grid_size = int(n_images**0.5) + (n_images**0.5 != int(n_images**0.5))

# Create a figure with a grid of subplots
fig, axes = plt.subplots(grid_size, grid_size, figsize=(10, 10))

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Iterate over images and plot them with labels
for i, (image_path, label) in enumerate(zip(image_paths, labels)):
    img = mpimg.imread(image_path)
    axes[i].imshow(img)
    axes[i].axis('off')  # Hide axes
    axes[i].set_title(label, fontsize=12)  # Add label

# Hide any remaining empty subplots
for j in range(i+1, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()

# Testing Facial Landmark detector on cuda

In [11]:
# app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
# app.prepare(ctx_id=0, det_size=(640, 640))  # Set detection size, though not used directly for cropped images

# Example usage
cropped_face_path = '/kaggle/input/raf-db-trial/92_86/92/dataset/RAF/test_0001.jpg'
cropped_face = cv2.imread(cropped_face_path)
top = 100
bottom = 100
left = 100
right = 100

# Pad the image
padded_image = cv2.copyMakeBorder(
    cropped_face,
    top,
    bottom,
    left,
    right,
    cv2.BORDER_CONSTANT,  # Border type
    value=[0, 0, 0]       # Padding color (black in this case)
)
# cv2_imshow(padded_image)
for i in range(10):
    out = app.get(padded_image)

In [19]:
out[0]['landmark_2d_106']

array([[156.45071 , 210.42764 ],
       [118.11843 , 147.66898 ],
       [126.07091 , 191.4655  ],
       [128.95174 , 195.59265 ],
       [132.36949 , 199.33719 ],
       [136.2097  , 202.70042 ],
       [140.47379 , 205.81747 ],
       [145.20027 , 208.36473 ],
       [150.51312 , 210.01663 ],
       [117.85137 , 152.72223 ],
       [117.86227 , 157.70833 ],
       [118.11784 , 162.64897 ],
       [118.63982 , 167.56123 ],
       [119.438286, 172.4909  ],
       [120.51621 , 177.42566 ],
       [121.8665  , 182.30336 ],
       [123.71557 , 187.01166 ],
       [189.53238 , 145.91133 ],
       [183.72394 , 189.44621 ],
       [181.2376  , 193.71155 ],
       [178.25674 , 197.62396 ],
       [174.90025 , 201.20598 ],
       [171.23077 , 204.61256 ],
       [167.04013 , 207.50168 ],
       [162.1745  , 209.54306 ],
       [190.07605 , 150.88753 ],
       [190.31711 , 155.8381  ],
       [190.25768 , 160.73816 ],
       [189.91148 , 165.61153 ],
       [189.28094 , 170.48413 ],
       [18

Build a dynamic FER model which takes video 