In [None]:
!git clone https://github.com/muxspace/facial_expressions

fatal: destination path 'facial_expressions' already exists and is not an empty directory.


In [None]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import cv2
import pathlib
import torch
import torchvision.transforms as transforms
from torchvision.io import read_image

DATA_DIR = "facial_expressions/data"
IMAGE_DIR = "facial_expressions/images"

# Check for GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [None]:
data = pd.read_csv(f'{DATA_DIR}/legend.csv')
# data = data.head(300)
# data = pd.read_csv(f'{DATA_DIR}/500_picts_satz.csv', names=['user', 'image', 'emotion'])
data['emotion'] = data['emotion'].str.lower()
data.rename(columns={'image': 'path'}, inplace=True)

In [None]:
# filter out image with bad szie
data = data[np.array([plt.imread(f'{IMAGE_DIR}/{path}').shape == (350, 350) for path in data['path']], dtype=bool)]
data.reset_index(inplace=True)

In [None]:
classes = list(data['emotion'].unique())
data['class'] = [classes.index(e) for e in data['emotion']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
total_size = len(data)
train_ratio = 0.7
train_size = int(total_size * train_ratio)
# get random sample 
train_data = data.sample(frac=train_ratio, axis=0)
# get everything but the train sample
test_data = data.drop(index=train_data.index)
# reset index
train_data.reset_index(inplace=True)
test_data.reset_index(inplace=True)

In [None]:
from torch.utils.data import Dataset
import pandas as pd
import os
from torchvision.io import read_image

class FacialExpression(Dataset):
  def __init__(self, data, img_dir=IMAGE_DIR, transform=None, target_transform=None):
    self.data = data
    self.img_dir = img_dir
    self.img_labels = self.data['class']
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    # print(f"my length {len(self.img_labels)}")
    return len(self.img_labels)

  def __getitem__(self, idx):
    # print(f"getting image {idx}")
    path = f'{self.img_dir}/{self.data["path"][idx]}'
    image = plt.imread(path).astype('float32')

    # print(f"getting label {idx}")
    label = self.img_labels[idx]
    # print(f"got label {idx}")
    if self.transform:
        image = self.transform(image)
    if self.target_transform:
        label = self.target_transform(label)
    return image, label

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    #  transforms.Normalize((0.5), (0.5)),
     ])
trainset = FacialExpression(train_data, transform=transform)


In [None]:
batch_size = 4
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(112896, 2000)
        self.fc2 = nn.Linear(2000, 200)
        self.fc3 = nn.Linear(200, len(classes))

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net().to(device)

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# optimizer = optim.Adam(net.parameters())

In [None]:
for epoch in range(2):  # loop over the dataset multiple times

  running_loss = 0.0
  for i, (inputs, labels) in enumerate(trainloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = inputs.to(device), labels.to(device)

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()

    # https://stackoverflow.com/a/66659607
    torch.nn.utils.clip_grad_norm_(net.parameters(), 5)
    optimizer.step()

    # print statistics
    running_loss += loss.item()
    if i % 200 == 199:    # print every 2000 mini-batches
      print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
      running_loss = 0.0

print('Finished Training')

[1,   200] loss: 0.151
[1,   400] loss: 0.126
[1,   600] loss: 0.111
[1,   800] loss: 0.093
[1,  1000] loss: 0.088
[1,  1200] loss: 0.091
[1,  1400] loss: 0.082
[1,  1600] loss: 0.087
[1,  1800] loss: 0.073
[1,  2000] loss: 0.077
[1,  2200] loss: 0.081
[2,   200] loss: 0.071
[2,   400] loss: 0.073
[2,   600] loss: 0.066
[2,   800] loss: 0.069
[2,  1000] loss: 0.075
[2,  1200] loss: 0.064
[2,  1400] loss: 0.071
[2,  1600] loss: 0.075
[2,  1800] loss: 0.067
[2,  2000] loss: 0.064
[2,  2200] loss: 0.070
Finished Training


In [None]:
testset = FacialExpression(test_data, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images.to(device)).to('cpu')
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        # print(labels[0])
        # print(outputs.data[0])
        # break
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the {len(test_data)} test images: {100 * correct // total} %')

Accuracy of the network on the 3829 test images: 78 %
Accuracy of the network on the 3829 test images: 2 %


In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images.to(device)).to('cpu')
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the {len(train_data)} train images: {100 * correct // total} %')

Accuracy of the network on the 8936 train images: 85 %
Accuracy of the network on the 8936 train images: 2 %


In [None]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images.to(device)).to('cpu')
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: neutral is 88.8 %
Accuracy for class: happiness is 77.1 %
Accuracy for class: surprise is 9.1 %
Accuracy for class: sadness is 0.0 %
Accuracy for class: anger is 0.0 %
Accuracy for class: contempt is 0.0 %
Accuracy for class: disgust is 0.0 %
Accuracy for class: fear  is 0.0 %
Accuracy for class: neutral is 0.0 %
Accuracy for class: happiness is 0.2 %
Accuracy for class: surprise is 89.5 %
Accuracy for class: sadness is 0.0 %
Accuracy for class: anger is 0.0 %
Accuracy for class: contempt is 0.0 %
Accuracy for class: disgust is 0.0 %
Accuracy for class: fear  is 0.0 %
