In [1]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

In [2]:
laber_name = os.listdir('data/train/')
encoder = LabelEncoder()
encoder.fit_transform(laber_name)

array([0, 1, 2, 3, 4, 5, 6], dtype=int64)

In [3]:
trans = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(44)
])

In [4]:
class Emotion(Dataset):
    def __init__(self, root, transform=None, train=True):
        self.root = root
        self.transform = transform
        self.train = train
        if train == True:
            self.images = self.getImage()
        else:
            self.images = self.getImageTest()

    def getImage(self):
        images = []
        emotions = os.listdir(self.root)
        for emotion in emotions:
            root = os.path.join(self.root, emotion)
            imageNames = os.listdir(root)
            imagesList = [root +'/' + image for image in imageNames]
            images = images + imagesList
        return images
    
    def getImageTest(self):
        images = [self.root + '/' + image for image in os.listdir(self.root)]
        return images

    
    def __getitem__(self, index):

        image_path = self.images[index]

        image = Image.open(image_path)
        image = np.array(image)[:, :, np.newaxis]
        image = np.concatenate((image, image, image), axis=2)

        data = self.transform(image)
        
        if self.train == True:
            label = encoder.transform([image_path.split('/')[2]])
            return data, label
        
        name = image_path.split('/')[-1]
        return data, name
    
    def __len__(self):
        return len(self.images)

In [5]:
trainSet = Emotion(root='data/train/', transform=trans)
trainLoader = DataLoader(trainSet, batch_size=128, shuffle=True)

valSet = Emotion(root='data/val/', transform=trans)
valLoader = DataLoader(valSet, batch_size=32, shuffle=True)

testSet = Emotion(root='data/test/', transform=trans, train=False)
testLoader = DataLoader(testSet, batch_size=1, shuffle=False)

In [23]:
VGG16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']

class VGG(nn.Module):

    def __init__(self, cfg) -> None:
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg)
        self.fc = nn.Linear(512, 7)

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3

        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            
            else:
                layers += [
                    nn.Conv2d(in_channels, x, kernel_size=3, stride=1, padding=1),
                    nn.BatchNorm2d(x),
                    nn.ReLU(inplace=True)]
                in_channels = x
        
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = nn.functional.dropout(out, p=0.5, training=self.training)
        out = self.fc(out)
        return out


In [33]:
class EmotionRecognition:

    def __init__(self) -> None:
        
        self.device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')
        self.model = VGG(VGG16).to(self.device)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr = 0.001)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=30, gamma=0.1)

    def train(self, trainLoader, num_epochs, load=True):
        if load == True:
            self.model.load_state_dict(torch.load('emotion.pth'))
            print('load finish')

        else:
            self.model.train()
            for epoch in range(num_epochs):
                running_loss = 0.0
                for i,data in enumerate(trainLoader, 0):
                    images, labels = data[0].to(self.device), data[1].to(self.device)
                    self.optimizer.zero_grad()
                    outputs = self.model.forward(images)
                    loss = self.criterion(outputs, labels.reshape(-1,).long())
                    loss.backward()
                    self.optimizer.step()

                    if i % 100 == 0:
                        print(f'epoch: {epoch + 1}/{num_epochs}, step: {i}/{len(trainLoader)}, loss: {loss.item()}')

            torch.save(self.model.state_dict(), 'emotion.pth')
            print('finish')

    def evaluate(self, test_loader):
        self.model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data in test_loader:
                images, labels = data[0].to(self.device), data[1].to(self.device)
                outputs = self.model.forward(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.reshape(-1,).long().size(0)
                correct += (predicted == labels.reshape(-1,).long()).sum().item()
        accuracy = correct / total
        return accuracy

In [28]:
emotion = EmotionRecognition()
emotion.train(trainLoader=trainLoader, num_epochs=0, load=False)

epoch: 1/30, step: 0/154, loss: 2.320784091949463
epoch: 1/30, step: 100/154, loss: 1.8492485284805298
epoch: 2/30, step: 0/154, loss: 1.911111831665039
epoch: 2/30, step: 100/154, loss: 1.969126582145691
epoch: 3/30, step: 0/154, loss: 1.9392344951629639
epoch: 3/30, step: 100/154, loss: 1.8171815872192383
epoch: 4/30, step: 0/154, loss: 1.8924098014831543
epoch: 4/30, step: 100/154, loss: 1.801241397857666
epoch: 5/30, step: 0/154, loss: 1.8290725946426392
epoch: 5/30, step: 100/154, loss: 1.719276785850525
epoch: 6/30, step: 0/154, loss: 1.8464679718017578
epoch: 6/30, step: 100/154, loss: 1.6889992952346802
epoch: 7/30, step: 0/154, loss: 1.820366621017456
epoch: 7/30, step: 100/154, loss: 1.6638925075531006
epoch: 8/30, step: 0/154, loss: 1.6818726062774658
epoch: 8/30, step: 100/154, loss: 1.560455322265625
epoch: 9/30, step: 0/154, loss: 1.479022741317749
epoch: 9/30, step: 100/154, loss: 1.4217413663864136
epoch: 10/30, step: 0/154, loss: 1.345154047012329
epoch: 10/30, step: 1

In [29]:
emotion.evaluate(test_loader=valLoader)

0.6214470284237726

In [30]:
pred = []
file_name = []

for images, name in testLoader:
    images = images.to(emotion.device)
    emotion.model.eval()
    pred = pred + torch.max(emotion.model.forward(images), 1)[1].tolist()
    file_name.append(name[0])
pred = encoder.inverse_transform(pred)

In [31]:
import pandas as pd

df = pd.DataFrame({'files_name' : file_name, 'class' : pred }, index=None)

In [32]:
df.to_csv('epoch30.csv', index=False)