In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from PIL import Image
import numpy as np
import time
import matplotlib.pyplot as plt

# 파이토치 계산 dvice 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using PyTorch version: {torch.__version__}, Device: {device}')

Using PyTorch version: 1.9.0+cu111, Device: cuda


In [2]:
class MishFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x * torch.tanh(F.softplus(x))   # x * tanh(ln(1 + exp(x)))

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_tensors[0]
        sigmoid = torch.sigmoid(x)
        tanh_sp = torch.tanh(F.softplus(x)) 
        return grad_output * (tanh_sp + x * sigmoid * (1 - tanh_sp * tanh_sp))

class Mish(nn.Module):
    def forward(self, x):
        return MishFunction.apply(x)

def to_Mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, Mish())
        else:
            to_Mish(child)
            
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            Mish(),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion),)

        self.shortcut = nn.Sequential()
        self.Mish = Mish()
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion))
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.Mish(x)
        return x
    
class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            Mish(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            Mish(),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),)

        self.shortcut = nn.Sequential()
        self.Mish = Mish()
        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*BottleNeck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*BottleNeck.expansion))         
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.Mish(x)
        return x
    
class ResNet(nn.Module):
    def __init__(self, block, num_block, num_classes=3, init_weights=True):
        super().__init__()
        self.in_channels=64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            Mish(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # weights inittialization
        if init_weights:
            self._initialize_weights()

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self,x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        x = self.conv3_x(output)
        x = self.conv4_x(x)
        x = self.conv5_x(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    # define weight initialization function
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
                
def resnet18():
    return ResNet(BasicBlock, [2,2,2,2])

def resnet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
    return ResNet(BottleNeck, [3, 4, 6, 3])

def resnet101():
    return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
    return ResNet(BottleNeck, [3, 8, 36, 3])

#### 전체 코드

- https://deep-learning-study.tistory.com/299

In [7]:
img.shape

(480, 640, 3)

In [6]:
faces.shape

torch.Size([224, 224, 3])

In [None]:
import cv2
import numpy as np
import tensorflow as tf

# 얼굴표정
model = resnet50().to(device)
model.load_state_dict(torch.load('best_model(정확도).pt')) 
model.eval()

net = cv2.dnn.readNet("./yolov3-openimages.weights", "./yolov3-openimages.cfg")

classes = []
with open("./openimages.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
EMOTIONS = ["Happy" ,"Netural","Sad"]

def softmax(x):
    array_x = x - np.max(x)
    exp_x = np.exp(array_x) 
    result = exp_x / np.sum(exp_x)
    
    return result

# 카메라 지정
cap = cv2.VideoCapture(0)

while cap.isOpened(): # 카메라가 켜져 있는 동안
    _, img = cap.read() # 카메라 불러온 프레임 넣기
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # yolo detect face
    try:
        height, width, channels = img.shape
        
        blob = cv2.dnn.blobFromImage(img, 0.00392, (224, 224), (0, 0, 0), True, crop=False)
        net.setInput(blob) # 네트워크 입력 설정하기(블롭 객체 정의)
        outs = net.forward(output_layers) # 네트워크 순방향 실행/추론(출력 레이어 이름)

        class_ids = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    # Object detected
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)
                    # 좌표
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)
                    
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) # 노이즈 제거(Non maximum suppresion)

        font = cv2.FONT_HERSHEY_PLAIN
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]              # 감지된 개체를 둘러싼 사각형의 좌표
                label = str(classes[class_ids[i]]) # 감지된 물체의 이름
                color = colors[i]
                if label == "Human face": # 라벨링 中 살마 얼굴 일경우
                    faces = img[y : y+h, x : x+w]  # 해당 부분의 좌표로 프레임을 정제
                    cv2.rectangle(img, (x,y ), (x + w, y + h), (255, 0 , 0), 3) # 사각형 그리기(이미지, 시작 좌표, 종료 좌표, BGR. 선두께)
                    cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
                    
                    faces = cv2.resize(faces, (224, 224))    # 크기 사이즈 변환 (224,224)
                    faces = faces.astype(np.float)           # 형식 변환(float)
                    faces = (faces/255)                      # 소수점 변환

                    faces = torch.from_numpy(faces)          # tensor 형식 변환
                    
                    value = faces.view([1,3,224,224])        # tensor 차원 변환
                    value = value.to(device).float()         # device 넣기 

                    output = model(value)                    # 모델 넣기 
                    _, preds = torch.max(output, 1)
                    emotion_result = "Happy" if (preds == 0) else "Neturality" if (preds == 1)  else "sad"
                    print("감정 결과: ", emotion_result)
                    print("해당 감정의 확률: {} %".format((softmax(output.tolist()[0]).round(3)*100).max()))
                    # 분류 모형을 통해 표정 예측

                    if (preds == 0): # happy일 경우 
                        cv2.putText(img, "happy", (50, 550), font, 3, color, 3)
                    elif (preds == 1): # 무표정일 경우
                        cv2.putText(img, "nothing", (50, 550), font, 3, color, 3)
                    else: 
                        cv2.putText(img, "sad", (50, 550), font, 3, color, 3)
        
    except Exception as e:
        pass
        
    cv2.imshow("image", img)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


감정 결과:  sad
해당 감정의 확률: 64.1 %
감정 결과:  sad
해당 감정의 확률: 59.099999999999994 %
감정 결과:  sad
해당 감정의 확률: 63.3 %
감정 결과:  sad
해당 감정의 확률: 71.8 %
감정 결과:  sad
해당 감정의 확률: 74.8 %
감정 결과:  sad
해당 감정의 확률: 68.10000000000001 %
감정 결과:  sad
해당 감정의 확률: 72.7 %
감정 결과:  sad
해당 감정의 확률: 82.39999999999999 %
감정 결과:  sad
해당 감정의 확률: 78.5 %
감정 결과:  sad
해당 감정의 확률: 79.4 %
감정 결과:  sad
해당 감정의 확률: 71.7 %
감정 결과:  sad
해당 감정의 확률: 76.3 %
감정 결과:  sad
해당 감정의 확률: 69.0 %
감정 결과:  sad
해당 감정의 확률: 67.2 %
감정 결과:  sad
해당 감정의 확률: 62.2 %
감정 결과:  sad
해당 감정의 확률: 59.8 %
감정 결과:  sad
해당 감정의 확률: 62.3 %
감정 결과:  sad
해당 감정의 확률: 61.3 %
감정 결과:  sad
해당 감정의 확률: 61.8 %
감정 결과:  sad
해당 감정의 확률: 61.0 %
감정 결과:  sad
해당 감정의 확률: 61.4 %
감정 결과:  sad
해당 감정의 확률: 61.7 %
감정 결과:  sad
해당 감정의 확률: 61.199999999999996 %
감정 결과:  sad
해당 감정의 확률: 62.4 %
감정 결과:  sad
해당 감정의 확률: 61.1 %
감정 결과:  sad
해당 감정의 확률: 57.4 %
감정 결과:  sad
해당 감정의 확률: 58.199999999999996 %
감정 결과:  sad
해당 감정의 확률: 62.8 %
감정 결과:  sad
해당 감정의 확률: 66.8 %
감정 결과:  sad
해당 감정의 확률: 81.6 %
감정 결과:  sad
해당 감정의 확률: 77.600000