In [3]:
# download dataset to file

import kagglehub
# Download latest version
path = kagglehub.dataset_download("msambare/fer2013")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fer2013


In [4]:
import os
import torch

In [4]:
from torchinfo import summary
from transformers.models.mobilenet_v2 import MobileNetV2Model
from transformers.models.mobilenet_v2 import MobileNetV2PreTrainedModel
from transformers.models.mobilenet_v2.configuration_mobilenet_v2 import MobileNetV2Config

In [7]:
# constants
PRETRAINED_CONFIG = ''
# https://huggingface.co/google/mobilenet_v2_1.4_224
PRETRAINED_MODEL = 'google/mobilenet_v2_1.4_224'
#cfg = MobileNetV2Config(PRETRAINED_CONFIG)
cfg = MobileNetV2Config()
BATCH_SIZE = 16
NUM_EPOCHS = 1

if not torch.cuda.is_available():
    print('GPU not available, running script on CPU..')
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

GPU not available, running script on CPU..


In [None]:
class FERDataset(torch.utils.data.Dataset):
    # https://www.kaggle.com/datasets/msambare/fer2013
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels
        self.transform = self._transform
        self.target_transform = self._target_transform
        self.class_map_loaded = {
            0: 'angry',
            1: 'disgust',
            2: 'fear',
            3: 'happy',
            4: 'neutral',
            5: 'sad',
            6: 'surprise'
        }
        self.class_map = {
            0: 'positive',
            1: 'negative',
            2: 'neutral'
        }
        self.NUM_CLASSES = len(self.class_map)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        image = self.transform(image)
        label = self.labels[idx]
        label = self.target_transform(label)

        return image, label

    def _transform(self, image):
        return image

    def _target_transform(self, target):
        # TODO: map to new class map
        return target

In [12]:
# load dataset into script
print('loading dataset from file...')

train_images = []
train_labels = []
for subdir in os.listdir(path+'/train'):
    for idx in os.listdir(path+'/train/'+subdir):
        train_images.append(idx)
        train_labels.append(subdir)

test_images = []
test_labels = []
for subdir in os.listdir(path+'/test'):
    for idx in os.listdir(path+'/test/'+subdir):
        train_images.append(idx)
        train_labels.append(subdir)
print('...done')

loading dataset from file...
...done


In [None]:
# TODO: create datasets
# TODO: split train images/labels into train and val

train_dataset = FERDataset(train_images, train_labels)
val_dataset = FERDataset(val_images, val_labels)
test_dataset = FERDataset(test_images, test_labels)

In [10]:
class MobileNetV2ForFacialExpressionRecognition(MobileNetV2PreTrainedModel):
    """
    from MobileNetV2 for image classification
    """
    def __init__(self, config):
        super().__init__(config=config)

        self.num_labels = 3
        self.mobilenet_v2 = MobileNetV2Model(config)

        last_hidden_size = self.mobilenet_v2.conv_1x1.convolution.out_channels

        # Classifier head
        self.dropout = torch.nn.Dropout(config.classifier_dropout_prob, inplace=True)
        self.classifier = torch.nn.Linear(last_hidden_size, config.num_labels)

        # Initialize weights and apply final processing
        self.post_init()

    def forward(self, pixel_values=None, output_hidden_states=None, labels=None):
        outputs = self.mobilenet_v2(pixel_values, output_hidden_states=output_hidden_states)
        pooled_output = outputs[1]
        logits = self.classifier(self.dropout(pooled_output))

        loss = None
        if labels is not None:
            self.config.problem_type = "multi_label_classification"
            loss_fct = BCEWithLogitsLoss()
            loss = loss_fct(logits, labels)
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

In [17]:
print('...creating MobileNetV2 model')
model = MobileNetV2ForFacialExpressionRecognition(cfg).from_pretrained(PRETRAINED_MODEL)
model.to(DEVICE)
summary(model, input_size=(BATCH_SIZE, 3, 48, 48))

...creating MobileNetV2 model


Layer (type:depth-idx)                             Output Shape              Param #
MobileNetV2ForFacialExpressionRecognition          [16, 1001]                --
├─MobileNetV2Model: 1-1                            [16, 1792]                --
│    └─MobileNetV2Stem: 2-1                        [16, 24, 24, 24]          --
│    │    └─MobileNetV2ConvLayer: 3-1              [16, 48, 24, 24]          1,392
│    │    └─MobileNetV2ConvLayer: 3-2              [16, 48, 24, 24]          528
│    │    └─MobileNetV2ConvLayer: 3-3              [16, 24, 24, 24]          1,200
│    └─ModuleList: 2-2                             --                        --
│    │    └─MobileNetV2InvertedResidual: 3-4       [16, 32, 12, 12]          10,000
│    │    └─MobileNetV2InvertedResidual: 3-5       [16, 32, 12, 12]          14,848
│    │    └─MobileNetV2InvertedResidual: 3-6       [16, 48, 6, 6]            17,952
│    │    └─MobileNetV2InvertedResidual: 3-7       [16, 48, 6, 6]            31,488
│    │    └─

In [None]:
# TODO: loss function

In [None]:
# TODO: compute accuracy

In [None]:
# TODO: train function

In [None]:
# TODO: validate function

In [None]:
# TODO: save best model

In [None]:
# TODO: plot epoch metrics function

In [None]:
# TODO: train_loop

In [None]:
# TODO: run training

In [None]:
# TODO: plot train vs validation loss
# TODO: plot train vs validation accuracy

In [None]:
# TODO: evaluate function (test data)

In [None]:
# TODO: run evaluation (test)

In [None]:
# TODO: export to ONNX