In [1]:
import argparse
import torch
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda, Grayscale, Resize
from torch.utils.data import DataLoader
from torch.optim import AdamW


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
!unzip gdrive/My\ Drive/MODELS_DATA/fer2013.zip > /dev/null
# /content/gdrive/MyDrive/MODELS_DATA/fer2013.zip

In [4]:
class Args:
    def __init__(self):
        self.epochs = 1000
        self.lr = 0.03
        self.no_cuda = False
        self.no_mps = False
        self.seed = 1
        self.save_model = False
        self.train_size = 28709
        self.threshold = 2
        self.test_size = 7178
        self.log_interval = 100

In [5]:
def get_y_neg(y):
    y_neg = y.clone()
    for idx, y_samp in enumerate(y):
        allowed_indices = list(range(7))
        allowed_indices.remove(y_samp.item())
        y_neg[idx] = torch.tensor(allowed_indices)[
            torch.randint(len(allowed_indices), size=(1,))
        ].item()
    return y_neg.to(device)

def overlay_y_on_x(x, y, classes=7):
    x_ = x.clone()
    x_[:, :classes] *= 0.0
    x_[range(x.shape[0]), y] = x.max()
    return x_

In [6]:
class Net(torch.nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.layers = []
        for d in range(len(dims) - 1):
            self.layers = self.layers + [Layer(dims[d], dims[d + 1]).to(device)]

    def predict(self, x):
        goodness_per_label = []
        for label in range(7):
            h = overlay_y_on_x(x, label)
            goodness = []
            for layer in self.layers:
                h = layer(h)
                goodness = goodness + [h.pow(2).mean(1)]
            goodness_per_label += [sum(goodness).unsqueeze(1)]
        goodness_per_label = torch.cat(goodness_per_label, 1)
        return goodness_per_label.argmax(1)

    def train(self, x_pos, x_neg):
        h_pos, h_neg = x_pos, x_neg
        for i, layer in enumerate(self.layers):
            print("training layer: ", i)
            h_pos, h_neg = layer.train(h_pos, h_neg)

class Layer(nn.Linear):
    def __init__(self, in_features, out_features, bias=True, device=None, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = AdamW(self.parameters(), lr=args.lr)
        self.threshold = args.threshold
        self.num_epochs = args.epochs

    def forward(self, x):
        x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)
        return self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0))

    def train(self, x_pos, x_neg):
        for i in range(self.num_epochs):
            g_pos = self.forward(x_pos).pow(2).mean(1)
            g_neg = self.forward(x_neg).pow(2).mean(1)
            loss = torch.log(
                1
                + torch.exp(
                    torch.cat([-g_pos + self.threshold, g_neg - self.threshold])
                )
            ).mean()
            self.opt.zero_grad()
            loss.backward()
            self.opt.step()
            if i % args.log_interval == 0:
                print("Loss: ", loss.item())
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()

In [7]:
if __name__ == "__main__":
    args = Args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    use_mps = not args.no_mps and torch.backends.mps.is_available()
    if use_cuda:
        device = torch.device("cuda")
    elif use_mps:
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

    train_kwargs = {"batch_size": args.train_size}
    test_kwargs = {"batch_size": args.test_size}

    if use_cuda:
        cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
        train_kwargs.update(cuda_kwargs)
        test_kwargs.update(cuda_kwargs)

    transform = Compose(
        [
            Resize((48, 48)),
            Grayscale(),
            ToTensor(),
            Normalize((0.5,), (0.5,)),
            Lambda(lambda x: torch.flatten(x)),
        ]
    )
    train_loader = DataLoader(
        ImageFolder("/content/train", transform=transform), **train_kwargs
    )
    test_loader = DataLoader(
        ImageFolder("/content/test", transform=transform), **test_kwargs
    )
    net = Net([2304, 500, 500])

    x, y = next(iter(train_loader))
    x, y = x.to(device), y.to(device)
    x_pos = overlay_y_on_x(x, y)
    y_neg = get_y_neg(y)
    x_neg = overlay_y_on_x(x, y_neg)

    net.train(x_pos, x_neg)
    train_error = 1.0 - net.predict(x).eq(y).float().mean().item()
    print("train error:", train_error)
    train_accuracy = (1 - train_error) * 100
    print("train accuracy: {:.2f}%".format(train_accuracy))

    x_te, y_te = next(iter(test_loader))
    x_te, y_te = x_te.to(device), y_te.to(device)
    if args.save_model:
        torch.save(net.state_dict(), "emotion_recognition.pt")
    test_error = 1.0 - net.predict(x_te).eq(y_te).float().mean().item()
    print("test error:", test_error)
    test_accuracy = (1 - test_error) * 100
    print("test accuracy: {:.2f}%".format(test_accuracy))

training layer:  0
Loss:  1.1268709897994995
Loss:  0.6782753467559814
Loss:  0.6575350165367126
Loss:  0.6454067826271057
Loss:  0.6363899111747742
Loss:  0.6278848052024841
Loss:  0.6196228861808777
Loss:  0.6115196347236633
Loss:  0.6035373210906982
Loss:  0.5956501960754395
training layer:  1
Loss:  1.1266690492630005
Loss:  0.6498326063156128
Loss:  0.6095683574676514
Loss:  0.5922841429710388
Loss:  0.5840622782707214
Loss:  0.578546404838562
Loss:  0.5741928815841675
Loss:  0.5705272555351257
Loss:  0.567319393157959
Loss:  0.5644208788871765
train error: 0.5762304365634918
train accuracy: 42.38%
test error: 0.619113951921463
test accuracy: 38.09%


In [8]:
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')