## 0.环境

In [1]:
import os
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
from sklearn.preprocessing import LabelEncoder

## 1.数据集准备

### 1.1处理标签

通过sklearn的LaberEncoder对图像的标签进行处理

In [2]:
laber_name = os.listdir('data/train/')
encoder = LabelEncoder()
encoder.fit_transform(laber_name)

array([0, 1, 2, 3, 4, 5, 6], dtype=int64)

### 1.2处理图片

In [3]:
trans = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

将数据集中图片转化为Dataset类

In [4]:
class Emotion(Dataset):
    def __init__(self, root, transform=None, train=True):
        self.root = root
        self.transform = transform
        self.train = train
        if train == True:
            self.images = self.getImage()
        else:
            self.images = self.getImageTest()

    def getImage(self):
        images = []
        emotions = os.listdir(self.root)
        for emotion in emotions:
            root = os.path.join(self.root, emotion)
            imageNames = os.listdir(root)
            imagesList = [root +'/' + image for image in imageNames]
            images = images + imagesList
        return images
    
    def getImageTest(self):
        images = [self.root + '/' + image for image in os.listdir(self.root)]
        return images

    
    def __getitem__(self, index):

        image_path = self.images[index]

        image = Image.open(image_path)
        data = self.transform(image)
        
        if self.train == True:
            label = encoder.transform([image_path.split('/')[2]])
            return data, label
        return data
    
    def __len__(self):
        return len(self.images)

In [5]:
emotion = Emotion(root='data/train/', transform=trans)
emotion[24]

(tensor([[[-0.3647, -0.2392, -0.4745,  ..., -0.2000, -0.0510, -0.1765],
          [-0.2314, -0.2000, -0.5294,  ..., -0.0353,  0.0118, -0.0039],
          [-0.1608, -0.2863, -0.4431,  ..., -0.0353,  0.1294,  0.0275],
          ...,
          [-0.2706, -0.1529, -0.1059,  ...,  0.0824,  0.0275,  0.0353],
          [-0.4431, -0.5294, -0.4510,  ...,  0.0667,  0.0588,  0.0353],
          [-0.9373, -1.0000, -1.0000,  ...,  0.0588,  0.0353,  0.0196]]]),
 array([0]))

### 1.3数据集准备

Loader 准备

In [6]:
train_dataset = Emotion(root='data/train/', transform=trans)
trainLoader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = Emotion(root='data/test/', transform=trans, train=False)
testLoader = DataLoader(test_dataset, batch_size=64, shuffle=False)

val_dataset = Emotion(root='data/val/', transform=trans)
valLoader = DataLoader(val_dataset, batch_size=32, shuffle=True)

## 2.模型训练

In [7]:
class EmotionCNN(nn.Module):
    
    def __init__(self, *args, **kwargs) -> None:
        super(EmotionCNN, self).__init__(*args, **kwargs)
        
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(9 * 9 * 16, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 128)
        self.fc4 = nn.Linear(128, 7)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 9 * 9 * 16)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [8]:
class EmotionRecognition:

    def __init__(self) -> None:
        
        self.device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')
        self.model = EmotionCNN().to(self.device)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr = 0.001)

    def train(self, trainLoader, num_epochs, load=True):
        if load == True:
            self.model.load_state_dict(torch.load('emotion.pth'))
            print('load finish')

        else:
            self.model.train()
            for epoch in range(num_epochs):
                running_loss = 0.0
                for i,data in enumerate(trainLoader, 0):
                    images, labels = data[0].to(self.device), data[1].to(self.device)
                    self.optimizer.zero_grad()
                    outputs = self.model.forward(images)
                    loss = self.criterion(outputs, labels.reshape(-1,).long())
                    loss.backward()
                    self.optimizer.step()

                    if i % 100 == 0:
                        print(f'epoch: {epoch + 1}/{num_epochs}, step: {i}/{len(trainLoader)}, loss: {loss.item()}')

            torch.save(self.model.state_dict(), 'emotion.pth')
            print('finish')

    def evaluate(self, test_loader):
        self.model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data in test_loader:
                images, labels = data[0].to(self.device), data[1].to(self.device)
                outputs = self.model.forward(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.reshape(-1,).long().size(0)
                correct += (predicted == labels.reshape(-1,).long()).sum().item()
        accuracy = correct / total
        return accuracy

In [9]:
emotion = EmotionRecognition()
emotion.train(trainLoader=trainLoader, num_epochs=15, load=False)

epoch: 1/15, step: 0/308, loss: 1.94259512424469
epoch: 1/15, step: 100/308, loss: 1.8010259866714478
epoch: 1/15, step: 200/308, loss: 1.6210118532180786
epoch: 1/15, step: 300/308, loss: 1.6426857709884644
epoch: 2/15, step: 0/308, loss: 1.509759783744812
epoch: 2/15, step: 100/308, loss: 1.3593642711639404
epoch: 2/15, step: 200/308, loss: 1.357539415359497
epoch: 2/15, step: 300/308, loss: 1.5541982650756836
epoch: 3/15, step: 0/308, loss: 1.3315808773040771
epoch: 3/15, step: 100/308, loss: 1.3869534730911255
epoch: 3/15, step: 200/308, loss: 1.342908501625061
epoch: 3/15, step: 300/308, loss: 1.442569375038147
epoch: 4/15, step: 0/308, loss: 1.3510205745697021
epoch: 4/15, step: 100/308, loss: 1.2106720209121704
epoch: 4/15, step: 200/308, loss: 1.1395156383514404
epoch: 4/15, step: 300/308, loss: 1.2936512231826782
epoch: 5/15, step: 0/308, loss: 1.0547114610671997
epoch: 5/15, step: 100/308, loss: 1.0763795375823975
epoch: 5/15, step: 200/308, loss: 0.9711562395095825
epoch: 5/

In [10]:
emotion.evaluate(test_loader=valLoader)

0.5116279069767442