In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from PIL import Image
import numpy as np
import time
import matplotlib.pyplot as plt

# 파이토치 계산 dvice 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using PyTorch version: {torch.__version__}, Device: {device}')

Using PyTorch version: 1.9.0+cu111, Device: cuda


In [2]:
#기본 정의
batch_size = 32
learning_rate = 0.0001
epochs = 70

test_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

test_dir = './emotion/test'
test_data = datasets.ImageFolder(root=test_dir, transform=test_transforms)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=True)

print(f'test images: {len(test_loader.dataset)}')

test images: 460


In [3]:
class MishFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x * torch.tanh(F.softplus(x))   # x * tanh(ln(1 + exp(x)))

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_tensors[0]
        sigmoid = torch.sigmoid(x)
        tanh_sp = torch.tanh(F.softplus(x)) 
        return grad_output * (tanh_sp + x * sigmoid * (1 - tanh_sp * tanh_sp))

class Mish(nn.Module):
    def forward(self, x):
        return MishFunction.apply(x)

def to_Mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, Mish())
        else:
            to_Mish(child)

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            Mish(),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion),)

        self.shortcut = nn.Sequential()
        self.Mish = Mish()
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion))
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.Mish(x)
        return x

class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            Mish(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            Mish(),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),)

        self.shortcut = nn.Sequential()
        self.Mish = Mish()
        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*BottleNeck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*BottleNeck.expansion))         
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.Mish(x)
        return x

class ResNet(nn.Module):
    def __init__(self, block, num_block, num_classes=3, init_weights=True):
        super().__init__()
        self.in_channels=64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            Mish(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # weights inittialization
        if init_weights:
            self._initialize_weights()

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self,x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        x = self.conv3_x(output)
        x = self.conv4_x(x)
        x = self.conv5_x(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    # define weight initialization function
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def resnet18():
    return ResNet(BasicBlock, [2,2,2,2])

def resnet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
    return ResNet(BottleNeck, [3, 4, 6, 3])

def resnet101():
    return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
    return ResNet(BottleNeck, [3, 8, 36, 3])

In [4]:
model = resnet50().to(device)
model.load_state_dict(torch.load('./모델모음/Resnet50_Mish.pt')) 
model.eval()
criterion = nn.CrossEntropyLoss()

In [5]:
with torch.no_grad():
    start = time.time()
    test_acc=0
    test_avg_loss = 0
    for image, label in test_loader:
        # ------- assign valid data
        image = image.to(device)
        label = label.to(device)
        # ------- forward prop
        test_output = model(image)
        test_loss = criterion(test_output,label)
        
        # ------- get valid performance
        test_batch_acc = ((test_output.argmax(dim=1) == label).float().mean()) # acc = 맞춘 개수 / 배치사이즈
        test_acc += test_batch_acc / len(test_loader) # acc / total_Iteration 
        test_avg_loss += test_loss / len(test_loader)
    print(f' test_acc : {test_acc:.4f}, test_loss : {test_avg_loss:.4f}, takes {time.time() - start}secs')

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


 test_acc : 0.8996, test_loss : 0.2597, takes 4.5579917430877686secs


In [6]:
#model = torch.load('RESNET50(base).pt') 

#### 사진 하나에 대한 예측 결과
- https://www.programcreek.com/python/example/117696/torchvision.transforms.RandomResizedCrop

#### 특정 사진으로 결과 값 확인

In [None]:
def get_img_tensor(img_path, use_cuda, get_size=False):
    img = Image.open(img_path)
    original_w, original_h = img.size

    img_size = (224, 224) 
    img.thumbnail(img_size, Image.ANTIALIAS)
    img = img.convert('RGB')
    
    find_transforms = transforms.Compose([transforms.Resize(256), transforms.Resize((224,224)),transforms.ToTensor()])
    img_tensor = find_transforms(img)
    
    return img_tensor
    
def custom_inshow(img):
    img = img.numpy()
    plt.imshow(np.transpose(img, (1,2,0)))
    plt.show()

value = get_img_tensor('./emotion/슬픈표정.jpg', device)
custom_inshow(value)

value = value.view([1,3,224,224])
value = value.to(device)
# 모델 예측
model.eval()
output = model(value)

# 예측 결과
_, preds = torch.max(output, 1)
print(preds)
# 결과 정리
emotion = "Happy" if (preds == 0) else "Neturality" if (preds == 1)  else "sad"
print("감정 결과: ", emotion)

def softmax(x):
    array_x = x - np.max(x)
    exp_x = np.exp(array_x) 
    result = exp_x / np.sum(exp_x)
    
    return result


print("해당 감정의 확률: {} %".format((softmax(output.tolist()[0]).round(3)*100).max()))

In [None]:
def get_img_tensor(img_path, use_cuda, get_size=False):
    img = Image.open(img_path)
    original_w, original_h = img.size

    img_size = (224, 224) 
    img.thumbnail(img_size, Image.ANTIALIAS)
    img = img.convert('RGB')
    
    find_transforms = transforms.Compose([transforms.Resize(256), transforms.Resize((224,224)),transforms.ToTensor()])
    img_tensor = find_transforms(img)
    
    return img_tensor
    
def custom_inshow(img):
    img = img.numpy()
    plt.imshow(np.transpose(img, (1,2,0)))
    plt.show()
    
value = get_img_tensor('./emotion/happy_find.jpg', device)
custom_inshow(value)

value = value.view([1,3,224,224])
value = value.to(device)
# 모델 예측
model.eval()
output = model(value)

# 예측 결과
_, preds = torch.max(output, 1)
print(preds)
# 결과 정리
emotion = "Happy" if (preds == 0) else "Neturality" if (preds == 1)  else "sad"
print("감정 결과: ", emotion)

def softmax(x):
    array_x = x - np.max(x)
    exp_x = np.exp(array_x) 
    result = exp_x / np.sum(exp_x)
    
    return result


print("해당 감정의 확률: {} %".format((softmax(output.tolist()[0]).round(3)*100).max()))

In [None]:
def get_img_tensor(img_path, use_cuda, get_size=False):
    img = Image.open(img_path)
    original_w, original_h = img.size

    img_size = (224, 224) 
    img.thumbnail(img_size, Image.ANTIALIAS)
    img = img.convert('RGB')
    
    find_transforms = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
    img_tensor = find_transforms(img)
    
    return img_tensor
    
def custom_inshow(img):
    img = img.numpy()
    plt.imshow(np.transpose(img, (1,2,0)))
    plt.show()
    
value = get_img_tensor('./emotion/무표정.jpg', device)
custom_inshow(value)

value = value.view([1,3,224,224])
value = value.to(device)
# 모델 예측
model.eval()
output = model(value)

# 예측 결과
_, preds = torch.max(output, 1)
print(preds)
# 결과 정리
emotion = "Happy" if (preds == 0) else "Neturality" if (preds == 1)  else "sad"
print("감정 결과: ", emotion)


def softmax(x):
    array_x = x - np.max(x)
    exp_x = np.exp(array_x) 
    result = exp_x / np.sum(exp_x)
    
    return result


print("해당 감정의 확률: {} %".format((softmax(output.tolist()[0]).round(3)*100).max()))

#### Confusion Matrix

In [None]:
classes = (0,1,2)

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

y_true = []
y_pred = []

# 여기서도 또한 필요없음
with torch.no_grad():
    for image, label in test_loader:
        # ------- assign valid data
        image = image.to(device)
        label = label.to(device)
        # ------- forward prop
        test_outputs = model(image)
        _, predictions = torch.max(test_outputs, 1)
        y_true.append(label)
        y_pred.append(predictions)        
        for labels, prediction in zip(label, predictions):
            if labels == prediction:
                correct_pred[classes[labels]] += 1
            total_pred[classes[labels]] += 1
                
                            
change = { 0 :'happy_emotion', 1 : 'Neturality_emotion', 2 : 'Sad_emotion'}
correct_pred = dict((change[key], value) for (key, value) in correct_pred.items())
total_pred = dict((change[key], value) for (key, value) in total_pred.items())   

# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
                                                   accuracy))

In [None]:
label_tags = {
    0: 'Happy', 
    1: 'Blank', 
    2: 'Sad', 
}
columns = 4 
rows = 8
fig = plt.figure(figsize=(16,40))
 
model.eval()
for i in range(1, columns*rows+1):
    data_idx = np.random.randint(len(test_data))
    input_img = test_data[data_idx][0].unsqueeze(dim=0).to(device) 

    output = model(input_img)
    _, argmax = torch.max(output, 1)
    pred = label_tags[argmax.item()]
    label = label_tags[test_data[data_idx][1]]

    fig.add_subplot(rows, columns, i)
    if pred == label:
        plt.title(pred + ', right !!')
    else:
        plt.title('Not ' + pred + ' but ' +  label)
    plot_img = test_data[data_idx][0]
    plt.imshow(np.transpose(plot_img, (1,2,0)))
    plt.axis('off')
    
plt.show() 

### (1) 직접 구현 

In [17]:
zero_true, one_true, two_true = 0, 0, 0
zero_to_one, zero_to_two, one_to_zero, one_to_two, two_to_zero, two_to_one = 0, 0, 0, 0, 0, 0

new= 0 
for i in range(14):
    for j in range(32):
        if y_pred[i][j] == y_true[i][j]:
            if y_pred[i][j] == 0:
                zero_true = zero_true + 1
            elif y_pred[i][j] == 1:
                one_true = one_true + 1 
            elif y_pred[i][j] == 2:
                two_true = two_true + 1
        else: 
            if y_true[i][j] == 0:
                if y_pred[i][j] == 1:
                    zero_to_one = zero_to_one + 1
                elif y_pred[i][j] == 2:
                    zero_to_two = zero_to_two + 1
            elif y_true[i][j] == 1:
                if y_pred[i][j] == 0:
                    one_to_zero = one_to_zero + 1
                elif y_pred[i][j] == 2:
                    one_to_two = one_to_two + 1
            elif y_true[i][j] == 2:
                if y_pred[i][j] == 0:
                    two_to_zero = two_to_zero + 1
                elif y_pred[i][j] == 1:
                    two_to_one = two_to_one + 1
                    
result_matrix = [[zero_true, zero_to_one, zero_to_two], [one_to_zero, one_true, one_to_two], [two_to_zero, two_to_one, two_true]]

import pandas as pd
result_matrix = pd.DataFrame(result_matrix, index = ['정답 클래스: 행복', '정답 클래스: 무표정', '정답 클래스: 슬픔']
                             , columns = ['예측 클래스: 행복', '예측 클래스: 무표정', '예측 클래스: 슬픔'])
display(result_matrix)

Unnamed: 0,예측 클래스: 행복,예측 클래스: 무표정,예측 클래스: 슬픔
정답 클래스: 행복,146,1,10
정답 클래스: 무표정,2,127,16
정답 클래스: 슬픔,6,8,132


In [18]:
# Class0의 confusion_maxtrix 
class0_TP = result_matrix.iloc[0,0] # class 0의 TP
class0_FN = result_matrix.iloc[0,1:].sum() # class 0의 FN
class0_FP = result_matrix.iloc[1:,0].sum() # class 0의 FN
class0_TN = result_matrix.sum().sum() - (class0_TP + class0_FN + class0_FP)

# Class1의 confusion_maxtrix 
class1_TP = result_matrix.iloc[1,1] # class 1의 TP
class1_FN = result_matrix.iloc[1,0] + result_matrix.iloc[1,2] # class 1의 FN
class1_FP = result_matrix.iloc[0,1] + result_matrix.iloc[2,1] # class 1의 FP
class1_TN = result_matrix.sum().sum() - (class1_TP + class1_FN + class1_FP)

# Class 2의 confusion_maxtrix 
class2_TP = result_matrix.iloc[2,2] # class 2의 TP
class2_FN = result_matrix.iloc[2,:-1].sum() # class 2의 FN
class2_FP = result_matrix.iloc[:-1,2].sum() # class 2의 FP
class2_TN = result_matrix.sum().sum() - (class2_TP + class2_FN + class2_FP)

# 각각의 클래스에 대한 정밀도/재현율/F1 구하기
class0_precision = class0_TP /(class0_TP + class0_FP)
class0_recall =  class0_TP /(class0_TP + class0_FN)
class0_f1 = 2 * class0_precision * class0_recall / (class0_precision + class0_recall) 

class1_precision = class1_TP /(class1_TP + class1_FP)
class1_recall =  class1_TP /(class1_TP + class1_FN)
class1_f1 = 2 * class1_precision * class1_recall / (class1_precision + class1_recall) 

class2_precision = class2_TP /(class2_TP + class2_FP)
class2_recall =  class2_TP /(class2_TP + class2_FN)
class2_f1 = 2 * class2_precision * class2_recall / (class2_precision + class2_recall) 

# 
result_report = np.round(pd.DataFrame([[class0_precision,class0_recall,class0_f1], 
                                       [class1_precision, class1_recall, class1_f1], 
                                       [class2_precision,class2_recall,class2_f1]], 
                                      columns = ['Precision', 'Recall','F1_score']),4)

result_report = result_report.append(np.round(result_report.mean(axis= 0),4), ignore_index = True)
result_report.index =  ['행복(class)', '무표정(class)','슬픔(class)',' 평균']
display(result_report)

Unnamed: 0,Precision,Recall,F1_score
행복(class),0.9481,0.9299,0.9389
무표정(class),0.9338,0.8759,0.9039
슬픔(class),0.8354,0.9041,0.8684
평균,0.9058,0.9033,0.9037


참고사항: https://ichi.pro/ko/numpyeseo-cheoeumbuteo-roc-gogseon-mich-auc-gasihwa-41861898328491

### (2) LIST 변환 후

In [33]:
new = []
for i in y_true:
    value = i.tolist()
    for j in value:
        new.append(j)
        
news = []
for i in y_pred:
    value = i.tolist()
    for j in value:
        news.append(j)
        
new = np.array(new)
news = np.array(news)

from sklearn.metrics import confusion_matrix
confusion_matrix(new, news)

from sklearn.metrics import classification_report
print(classification_report(new, news, target_names=['class 웃음', 'class 무표정','class 슬픔']))

              precision    recall  f1-score   support

    class 웃음       0.94      0.92      0.93       156
   class 무표정       0.95      0.87      0.91       145
    class 슬픔       0.83      0.91      0.87       147

    accuracy                           0.90       448
   macro avg       0.91      0.90      0.90       448
weighted avg       0.91      0.90      0.90       448

