In [16]:
import os
import random
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import torch 
import torch.nn as nn
import torch.nn.init as init
import torchvision

In [5]:
seed = 1234
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [7]:
class OpenPoseNet(nn.Module):
    def __init__(self):
        super(OpenPoseNet, self).__init__()
        
        # Feature module
        self.model0 = OpenPose_Feature()
        
        # Stage module
        # PAFs
        self.model1_1 = make_OpenPose_block('block1_1')
        self.model2_1 = make_OpenPose_block('block2_1')
        self.model3_1 = make_OpenPose_block('block3_1')
        self.model4_1 = make_OpenPose_block('block4_1')
        self.model5_1 = make_OpenPose_block('block5_1')
        self.model6_1 = make_OpenPose_block('block6_1')
        
        # confidence heatmap
        self.model1_2 = make_OpenPose_block('block1_2')
        self.model2_2 = make_OpenPose_block('block2_2')
        self.model3_2 = make_OpenPose_block('block3_2')
        self.model4_2 = make_OpenPose_block('block4_2')
        self.model5_2 = make_OpenPose_block('block5_2')
        self.model6_2 = make_OpenPose_block('block6_2')
        
    def forward(self, x):
        out1 = self.model0(x)
        
        out1_1 = self.model1_1(out1) # PAFs
        out1_2 = self.model1_2(out1) # confidence heatmap
        
        out2 = torch.cat([out1_1, out1_2, out1], dim=1)
        out2_1 = self.model2_1(out2)
        out2_2 = self.model2_2(out2)
        
        out3 = torch.cat([out2_1, out2_2, out1], dim=1)
        out3_1 = self.model3_1(out3)
        out3_2 = self.model3_2(out3)
        
        out4 = torch.cat([out3_1, out3_2, out1], dim=1)
        out4_1 = self.model4_1(out4)
        out4_2 = self.model4_2(out4)
        
        out5 = torch.cat([out4_1, out4_2, out1], dim=1)
        out5_1 = self.model5_1(out5)
        out5_2 = self.model5_2(out5)
        
        out6 = torch.cat([out5_1, out5_2, out1], dim=1)
        out6_1 = self.model6_1(out6)
        out6_2 = self.model6_2(out6)
        
        saved_for_loss = []
        saved_for_loss.append(out1_1) # PAFs
        saved_for_loss.append(out1_2) # confidence heatmap
        saved_for_loss.append(out2_1) # PAFs
        saved_for_loss.append(out2_2) # confidence heatmap
        saved_for_loss.append(out3_1) # PAFs
        saved_for_loss.append(out3_2) # confidence heatmap
        saved_for_loss.append(out4_1) # PAFs
        saved_for_loss.append(out4_2) # confidence heatmap
        saved_for_loss.append(out5_1) # PAFs
        saved_for_loss.append(out5_2) # confidence heatmap
        saved_for_loss.append(out6_1) # PAFs
        saved_for_loss.append(out6_2) # confidence heatmap
        
        # 最終的なPAFsのout6_1とconfidence heatmapのout6_2、そして
        # 損失計算用に各ステージでのPAFsとheatmapを格納したsaved_for_lossを出力
        # out6_1: torch.Size([minibatch, 38, 46, 46])
        # out6_2: torch.Size([minibatch, 19, 46, 46])
        
        return (out6_1, out6_2), saved_for_loss

# Feature, Stage moduleの実装

In [13]:
class OpenPose_Feature(nn.Module):
    def __init__(self):
        super(OpenPose_Feature, self).__init__()
        
        vgg19 = torchvision.models.vgg19(pretrained=True)
        model = {}
        model['block0'] = vgg19.features[0:23]
        
        # 新たな畳み込み層を2つ用意
        model['block0'].add_module('23', torch.nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1))
        model['block0'].add_module('24', torch.nn.ReLU(inplace=True))
        model['block0'].add_module('25', torch.nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1))
        model['block0'].add_module('26', torch.nn.ReLU(inplace=True))
        
        self.model = model['block0']
        
    def forward(self, x):
        outputs = self.model(x)
        return outputs

In [14]:
def make_OpenPose_block(block_name):
    """
    コンフィグレーション変数からOpenPoseのStage moduleのblockを作成
    nn.Moduleではなく、nn.Sequentialにする
    """
    
    # 1 コンフィグレーションの辞書変数blocksを作成、ネットワークを生成させる
    # 最初に全パターンの辞書を用意し、引数block_nameのみを生成する
    blocks = {}
    # stage1
    blocks['block1_1'] = [{'conv5_1_CPM_L1': [128, 128, 3, 1, 1]},
                          {'conv5_2_CPM_L1': [128, 128, 3, 1, 1]},
                          {'conv5_3_CPM_L1': [128, 128, 3, 1, 1]},
                          {'conv5_4_CPM_L1': [128, 512, 1, 1, 0]},
                          {'conv5_5_CPM_L1': [512, 38, 1, 1, 0]} # PAF:38
                         ]
    blocks['block1_2'] = [{'conv5_1_CPM_L2': [128, 128, 3, 1, 1]},
                          {'conv5_2_CPM_L2': [128, 128, 3, 1, 1]},
                          {'conv5_3_CPM_L2': [128, 128, 3, 1, 1]},
                          {'conv5_4_CPM_L2': [128, 512, 1, 1, 0]},
                          {'conv5_5_CPM_L2': [512, 19, 1, 1, 0]} # heatmap: 19
                         ]
    # stage2-6
    for i in range(2, 7):
        blocks['block%d_1' % i] = [
            {'Mconv1_stage%d_L1' % i: [185, 128, 7, 1, 3]},
            {'Mconv2_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv3_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv4_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv5_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv6_stage%d_L1' % i: [128, 128, 1, 1, 0]},
            {'Mconv7_stage%d_L1' % i: [128, 38, 1, 1, 0]}
        ]
        
        blocks['block%d_2' % i] = [
            {'Mconv1_stage%d_L2' % i: [185, 128, 7, 1, 3]},
            {'Mconv2_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv3_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv4_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv5_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv6_stage%d_L2' % i: [128, 128, 1, 1, 0]},
            {'Mconv7_stage%d_L2' % i: [128, 19, 1, 1, 0]}
        ]
        
    cfg_dict = blocks[block_name]
    
    layers = []
    
    for i in range(len(cfg_dict)):
        for k, v in cfg_dict[i].items():
            if 'pool' in k:
                layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])]
            else:
                conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4])
                layers += [conv2d, nn.ReLU(inplace=True)]
                
    net = nn.Sequential(*layers[:-1])
    
    def _initialize_weights_norm(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    init.constant_(m.bias, 0.0)
                    
    net.apply(_initialize_weights_norm)
    
    return net

In [17]:
# check
net = OpenPoseNet()
net.train()

batch_size = 2
dummy_img = torch.rand(batch_size, 3, 368, 368)

outputs = net(dummy_img)
print(outputs)

((tensor([[[[ 6.5713e-06,  2.2009e-06,  1.0352e-05,  ...,  1.7822e-06,
           -2.6448e-05, -1.3621e-06],
          [-1.0527e-05, -4.0821e-05, -3.3035e-05,  ...,  1.3040e-05,
           -2.8428e-06,  2.6035e-06],
          [ 1.9457e-05, -5.1525e-06,  5.3254e-07,  ...,  1.1748e-05,
            2.3775e-05,  2.0857e-05],
          ...,
          [ 1.2054e-05,  2.5417e-05,  2.6322e-05,  ..., -9.7114e-06,
           -1.1764e-05,  3.3469e-06],
          [ 4.0079e-06, -2.5591e-06, -3.8760e-07,  ..., -1.2629e-05,
            1.5987e-05,  1.0864e-05],
          [-1.9248e-06,  2.9436e-06, -1.5959e-05,  ...,  1.6265e-05,
            2.2856e-05, -3.9686e-06]],

         [[-9.3798e-06,  8.2357e-07, -1.7070e-05,  ...,  3.0282e-06,
           -2.2590e-05, -3.9660e-05],
          [-2.9468e-05, -3.7642e-05, -1.5067e-05,  ...,  2.3652e-05,
            2.1788e-06, -3.0908e-05],
          [-1.1955e-05, -1.1826e-05, -1.8748e-05,  ...,  3.4890e-05,
           -1.9353e-06, -1.2917e-05],
          ...,
   