<a href="https://www.kaggle.com/code/vovanquangnbk/drowsy-train-face?scriptVersionId=144340782" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## Setup

In [None]:
!pip -q install facenet_pytorch

In [None]:
from glob import glob
from sklearn.model_selection import GroupKFold, StratifiedKFold
import cv2
from skimage import io
import torch
from torch import nn
import os
from datetime import datetime
import time
import random
import cv2
import torchvision
from torchvision import transforms
import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

import sklearn
import warnings
import joblib
from sklearn.metrics import roc_auc_score, log_loss
from sklearn import metrics
import warnings
import cv2
import pydicom

from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face

In [None]:
CFG = {
    'data_dir': '/kaggle/input/drowsy-cropfacevec-vggface',
    'seed': 719,
    'model_arch': 'LSTM',
    'embedding_features': 2048, #512,
    'train_all': False,
    'epochs': 5,
    'used_epochs': [2,3,4],
    'train_bs': 8,
    'valid_bs': 8,
    'T_0': 10,
    'lr': 1e-5,
    'min_lr': 1e-6,
    'weight_decay':1e-6,
    'num_workers': 2,
    'accum_iter': 2, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    'show_examples': False,
}

In [None]:
paths = os.path.join(CFG['data_dir'], "*.csv")
df = pd.concat(map(pd.read_csv, glob(paths)))
df = df.sample(frac=1)
df = df.reset_index(drop=True)

train = df[df['fold'] != 'fold1']
test = df[df['fold'] == 'fold1']
print(train.shape)
print(test.shape)

test.head()

## Utils

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

## Dataset

In [None]:
class MyDataset(Dataset):
    def __init__(self,
                 df,
                 data_root=None,
                 transforms=None,
                 output_label=True,
                 one_hot_label=False,
                ):

        super().__init__()
        self.df = df.copy()
        self.data_root = data_root
        self.transforms = transforms
        self.output_label = output_label
        self.one_hot_label = one_hot_label

        if output_label == True:
            self.labels = self.df['label'].values
            if one_hot_label is True:
                self.labels = np.eye(self.df['label'].max()+1)[self.labels]

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index: int):

        # get labels
        if self.output_label:
            label = self.labels[index]

        x_dir = os.path.join(
            self.data_root, 
            self.df.iloc[index]['fold'],
            self.df.iloc[index]['id'])
        x = np.load(x_dir)
        x = torch.from_numpy(x).to(torch.uint8)
        x = x.permute((0,3,1,2))

        if self.output_label == True:
            return x, label
        else:
            return label

# Test dataset
if CFG['show_examples']:
    dataset = MyDataset(train, CFG['data_dir'])
    for i, (x, label) in enumerate(dataset):
        print(x.shape, label)
        if i > 5:
            break

In [None]:
class PadSequence:
    def __call__(self, batch):
        # Let's assume that each element in "batch" is a tuple (data, label).
        # Sort the batch in the descending order
        sorted_batch = sorted(batch, key=lambda x: x[0].shape[0], reverse=True)
        # Get each sequence and pad it
        sequences = [x[0] for x in sorted_batch]
        sequences_padded = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True)
        # Also need to store the length of each sequence
        # This is later needed in order to unpad the sequences
        lengths = torch.LongTensor([len(x) for x in sequences])
        # Don't forget to grab the labels of the *sorted* batch
        labels = torch.LongTensor(list(map(lambda x: x[1], sorted_batch)))
        return sequences_padded, labels

def prepare_dataloader(df, trn_idx, val_idx, train_all=CFG['train_all']):

    from catalyst.data.sampler import BalanceClassSampler

    train_ = df.loc[trn_idx,:].reset_index(drop=True)
    valid_ = df.loc[val_idx,:].reset_index(drop=True)

    train_ds = MyDataset(train_, data_root=CFG['data_dir'], output_label=True, one_hot_label=False)
    valid_ds = MyDataset(valid_, data_root=CFG['data_dir'], output_label=True)

    train_loader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=CFG['train_bs'],
        pin_memory=False,
        drop_last=False,
        shuffle=True,
        num_workers=CFG['num_workers'],
        collate_fn = PadSequence(),
#         sampler=BalanceClassSampler(labels=train_['label'].values, mode="downsampling")
    )
    val_loader = torch.utils.data.DataLoader(
        valid_ds,
        batch_size=CFG['valid_bs'],
        num_workers=CFG['num_workers'],
        shuffle=False,
        collate_fn = PadSequence(),
        pin_memory=False,
    )
    return train_loader, val_loader

# Test data loader
if CFG['show_examples']:
    dataset = MyDataset(train, CFG['data_dir'])
    trn_idx = train[train['fold'] == 'fold4'].index.tolist()
    val_idx = train[train['fold'] == 'fold3'].index.tolist()
    train_loader, val_loader = prepare_dataloader(train, trn_idx, val_idx, train_all=CFG['train_all'])

    print(len(train_loader))
    for i, (x, label) in enumerate(train_loader):
        x = x.to(CFG['device'])
        label = label.to(CFG['device'])
        print(x.shape, label.shape)
        if i > 5:
            break
    print(len(val_loader))
    for i, (x, label) in enumerate(val_loader):
        x = x.to(CFG['device'])
        label = label.to(CFG['device'])
        print(x.shape, label.shape)
        if i > 5:
            break

## Model

### LSTM

In [None]:
class MyClassifier(nn.Module):
    def __init__(self, input_size=512, hidden = 512):
        super().__init__()
        ## Inception Resnet
        self.backbone = InceptionResnetV1(pretrained='vggface2')
        
        ## BiLSTM
        self.hidden = hidden
        self.input_size = input_size
        self.lstm = nn.LSTM(self.input_size, self.hidden, batch_first=True)
        self.fc1 = nn.Linear(self.hidden, 50)
        self.selu = nn.SELU()
        self.bn2 = nn.BatchNorm1d(50)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(50, 2)
        self._reinitialize()

    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'lstm' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
            elif 'fc' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)

    def forward(self, x):
        b,n,c,h,w = x.size()
        x = x.reshape(-1,c,h,w)
        x = x.float()
        x = self.backbone(x)
        x = x.reshape(-1,n,self.input_size)
        x, _ = self.lstm(x)
        x = x[:,-1,:]
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.selu(x)
        x = self.fc2(x)

        return x

# Test model
if CFG['show_examples']:
    model = MyClassifier().to(CFG['device'])
    dataset = MyDataset(df, CFG['data_dir'])
    trn_idx = train[train['fold'] == 'fold2'].index.tolist()
    val_idx = train[train['fold'] == 'fold3'].index.tolist()
    train_loader, val_loader = prepare_dataloader(train, trn_idx, val_idx, train_all=CFG['train_all'])
    loss = nn.BCEWithLogitsLoss().to(CFG['device'])

    for i, (x, label) in enumerate(train_loader):
        x = x.to(CFG['device']).byte()
        preds = model(x).squeeze().to(CFG['device'])
        label = label.float().to(CFG['device'])
        print(preds.shape, label.shape)
        print(loss(preds, label))
        if i > 5:
            break

## Training function

In [None]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, scaler, scheduler=None, schd_batch_update=False, threshold=0.5):
    model.train()

    t = time.time()
    running_loss = None
    preds_all = []
    y_all = []
    threshold = threshold

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (X, y) in pbar:
        X = X.to(device).float()
        y = y.to(device).long()

        with autocast():
            preds = model(X)
            preds_all += [torch.argmax(preds, 1).detach().cpu().numpy()]
            y_all += [y.detach().cpu().numpy()]

            loss = loss_fn(preds, y)

            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) %  CFG['accum_iter'] == 0) or ((step + 1) == len(train_loader)):
                # may unscale_ here if desired (e.g., to allow clipping unscaled gradients)

                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()

                if scheduler is not None and schd_batch_update:
                    scheduler.step()

            if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'

                pbar.set_description(description)

    if scheduler is not None and not schd_batch_update:
        scheduler.step()
    
    preds_all = np.concatenate(preds_all)
    y_all = np.concatenate(y_all)
    print('train multi-class accuracy = {:.4f}'.format((preds_all==y_all).mean()))

def valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False, threshold=0.5):
    model.eval()

    t = time.time()
    loss_sum = 0
    sample_num = 0
    preds_all = []
    y_all = []

    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (X, y) in pbar:
        X = X.to(device).float()
        y = y.to(device).long()

        preds = model(X)
        preds_all += [torch.argmax(preds, 1).detach().cpu().numpy()]
        y_all += [y.detach().cpu().numpy()]

        loss = loss_fn(preds, y)

        loss_sum += loss.item()*y.shape[0]
        sample_num += y.shape[0]

        if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)

    preds_all = np.concatenate(preds_all)
    y_all = np.concatenate(y_all)
    print('validation multi-class accuracy = {:.4f}'.format((preds_all==y_all).mean()))

    if scheduler is not None:
        if schd_loss_update:
            scheduler.step(loss_sum/sample_num)
        else:
            scheduler.step()
            
def inference_one_epoch(model, data_loader, device):
    model.eval()
    preds_all = []

    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (X,y) in pbar:
        X = X.to(device).float()
        y = y.to(device).long()

        preds = model(X)
        preds_all += [torch.softmax(preds, 1).detach().cpu().numpy()]

    preds_all = np.concatenate(preds_all)
    
    return preds_all

## Main

In [None]:
if __name__ == '__main__':
    seed_everything(CFG['seed'])
    trn_idx = df[df['fold'] != f"fold1"].index.tolist()
    test_idx = df[df['fold'] == f"fold1"].index.tolist()

    print(len(trn_idx), len(test_idx))

    train_loader, val_loader = prepare_dataloader(df, trn_idx, test_idx)

    model = MyClassifier().to(CFG['device'])
    scaler = GradScaler()
    optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)

    loss_fn = nn.CrossEntropyLoss().to(CFG['device'])

    for epoch in range(CFG['epochs']):
        print("\n")
        train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, CFG['device'], scaler, scheduler=scheduler, schd_batch_update=False)

        with torch.no_grad():
            valid_one_epoch(epoch, model, loss_fn, val_loader, CFG['device'], scheduler=None, schd_loss_update=False, threshold=0.5)
        if epoch >= 2:
            torch.save(model.state_dict(), '{}_{}'.format(CFG['model_arch'], epoch))

    del model, optimizer, train_loader, val_loader, scaler, scheduler
    torch.cuda.empty_cache()

In [None]:
ckpt_path = '/kaggle/input/drowsy-ckpts'
best_model = MyClassifier().to(CFG['device'])
tst_preds_all = []

test_ds = MyDataset(test, data_root=CFG['data_dir'], output_label=True)
tst_loader = torch.utils.data.DataLoader(
    test_ds,
    batch_size=1,
    num_workers=CFG['num_workers'],
    shuffle=False,
    collate_fn = PadSequence(),
    pin_memory=False,
)

start_time = time.time()
tst_preds = []

for i, epoch in enumerate(CFG['used_epochs']):
    best_model.load_state_dict(torch.load(ckpt_path+'/{}_{}'.format(CFG['model_arch'], epoch), map_location=torch.device(CFG['device'])))

    with torch.no_grad():
        tst_preds += [inference_one_epoch(best_model, tst_loader, CFG['device'])]

tst_preds_all += [np.mean(tst_preds, axis=0)]

del best_model
torch.cuda.empty_cache()

tst_preds_all = np.mean(tst_preds_all, axis=0)
tst_preds_all = np.argmax(tst_preds_all, axis=1)

test['preds'] = tst_preds_all
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

print("Multi-class accuracy: ", accuracy_score(test["label"], test["preds"]))
print("F1-score: ", f1_score(test["label"], test["preds"]))
print("Precision: ", precision_score(test["label"], test["preds"]))
print("Recall: ", recall_score(test["label"], test["preds"]))

In [None]:
import seaborn as sns
y_true = test["label"]
y_pred = test["preds"]
cf_mt = confusion_matrix(y_true, y_pred)
sns.heatmap(cf_mt, annot=True)

In [None]:
# print wrong predictions
wrong_ids = y_true != y_pred
wrong_preds = test[wrong_ids]
wrong_preds.sample(20)

In [None]:
# print correct predictions
correct_ids = y_true == y_pred
correct_preds = test[correct_ids]
correct_preds.sample(20)