In [1]:
import os 
import os.path as osp
import math
import time
import pandas as pd
import numpy as np
import cv2

import torch
import torch.utils.data as data
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

import matplotlib.pyplot as plt

from PIL import Image, ImageOps, ImageFilter

import re
import glob

In [2]:
PATH = 'C:/Users/Karen/Documents/PSPNet'
ROOTPH = 'C:/Users/Karen/Documents/PSPNet/10m_image'
STADIED_PATH = 'C:/Users/Karen/Documents/PSPNet/dataset_voc_10m_(1)+(3)/weights/pspnet50_15.pth'

In [3]:
def make_datapath_list(rootpath):
    """
    学習、検証の画像データとアノテーションデータへのファイルパスリストを作成する。

    Parameters
    ----------
    rootpath : str
        データフォルダへのパス

    Returns
    -------
    ret : train_img_list, train_anno_list, val_img_list, val_anno_list
        データへのパスを格納したリスト
    """

    # 画像ファイルへのパス
    imgpath_template = glob.glob(rootpath + '/*')

    id_names = list()
    # ファイルのID（ファイル名）を取得する
    for imgpath in imgpath_template:
        splits = imgpath.split('\\')
        id_names.append(splits[len(splits)-1][:-4])

    return imgpath_template,id_names

    
class DataTransform():

    def __init__(self,input_size):
        self.input_size = input_size

    def __call__(self,img):
        img = img.resize((self.input_size, self.input_size),Image.BICUBIC)
        img = torch.from_numpy(np.array(img, dtype=float).transpose(2,0,1)) / 255
        return img

class VOCDataset(data.Dataset):
    """
    VOC2012のDatasetを作成するクラス。PyTorchのDatasetクラスを継承。

    Attributes
    ----------
    img_list : リスト
        画像のパスを格納したリスト
    anno_list : リスト
        アノテーションへのパスを格納したリスト
    phase : 'train' or 'test'
        学習か訓練かを設定する。
    transform : object
        前処理クラスのインスタンス
    """

    def __init__(self, img_list, transform,id_names):
        self.img_list = img_list
        self.transform = transform
        self.id_names = id_names

    def __getitem__(self, index):
        '''
        前処理をした画像のTensor形式のデータとアノテーションを取得
        '''
        img = self.pull_item(index)
        return img
    
    def __len__(self):
        '''画像の枚数を返す'''
        return len(self.img_list)

    def id_list(self,index):
        idname = self.id_names[index]
        return idname

    def pull_item(self,index):
        '''画像のTensor形式のデータ、アノテーションを取得する'''

        # 1. 画像読み込み
        image_file_path = self.img_list[index]
        img = Image.open(image_file_path)   # [高さ][幅][色RGB]

        # 3. 前処理を実施
        img = self.transform(img)

        return img


class PSPNet(nn.Module):
    def __init__(self, n_classes):
        super(PSPNet, self).__init__()

        # パラメータ設定
        block_config = [3, 4, 6, 3]  # resnet50
        img_size = 475
        img_size_8 = 60  # img_sizeの1/8に

        # 4つのモジュールを構成するサブネットワークの用意
        self.feature_conv = FeatureMap_convolution()
        self.feature_res_1 = ResidualBlockPSP(
            n_blocks=block_config[0], in_channels=128, mid_channels=64, out_channels=256, stride=1, dilation=1)
        self.feature_res_2 = ResidualBlockPSP(
            n_blocks=block_config[1], in_channels=256, mid_channels=128, out_channels=512, stride=2, dilation=1)
        self.feature_dilated_res_1 = ResidualBlockPSP(
            n_blocks=block_config[2], in_channels=512, mid_channels=256, out_channels=1024, stride=1, dilation=2)
        self.feature_dilated_res_2 = ResidualBlockPSP(
            n_blocks=block_config[3], in_channels=1024, mid_channels=512, out_channels=2048, stride=1, dilation=4)

        self.pyramid_pooling = PyramidPooling(in_channels=2048, pool_sizes=[
            6, 3, 2, 1], height=img_size_8, width=img_size_8)

        self.decode_feature = DecodePSPFeature(
            height=img_size, width=img_size, n_classes=n_classes)

        self.aux = AuxiliaryPSPlayers(
            in_channels=1024, height=img_size, width=img_size, n_classes=n_classes)

    def forward(self, x):
        x = self.feature_conv(x)
        x = self.feature_res_1(x)
        x = self.feature_res_2(x)
        x = self.feature_dilated_res_1(x)

        output_aux = self.aux(x)  # Featureモジュールの途中をAuxモジュールへ

        x = self.feature_dilated_res_2(x)

        x = self.pyramid_pooling(x)
        output = self.decode_feature(x)

        return (output, output_aux)


class conv2DBatchNormRelu(nn.Module):#net#FeatureMap_convolution
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNormRelu, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        # inplase設定で入力を保存せずに出力を計算し、メモリ削減する

    def forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        outputs = self.relu(x)

        return outputs


class FeatureMap_convolution(nn.Module):#net
    def __init__(self):
        '''構成するネットワークを用意'''
        super(FeatureMap_convolution, self).__init__()

        # 畳み込み層1
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 3, 64, 3, 2, 1, 1, False
        self.cbnr_1 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # 畳み込み層2
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 64, 3, 1, 1, 1, False
        self.cbnr_2 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # 畳み込み層3
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 128, 3, 1, 1, 1, False
        self.cbnr_3 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # MaxPooling層
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    def forward(self, x):
        x = self.cbnr_1(x)
        x = self.cbnr_2(x)
        x = self.cbnr_3(x)
        outputs = self.maxpool(x)
        return outputs


class ResidualBlockPSP(nn.Sequential):#net
    def __init__(self, n_blocks, in_channels, mid_channels, out_channels, stride, dilation):
        super(ResidualBlockPSP, self).__init__()

        # bottleNeckPSPの用意
        self.add_module(
            "block1",
            bottleNeckPSP(in_channels, mid_channels,
                          out_channels, stride, dilation)
        )

        # bottleNeckIdentifyPSPの繰り返しの用意
        for i in range(n_blocks - 1):
            self.add_module(
                "block" + str(i+2),
                bottleNeckIdentifyPSP(
                    out_channels, mid_channels, stride, dilation)
            )


class conv2DBatchNorm(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNorm, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        outputs = self.batchnorm(x)

        return outputs


class bottleNeckPSP(nn.Module):#net##ResidualBlockPSP
    def __init__(self, in_channels, mid_channels, out_channels, stride, dilation):
        super(bottleNeckPSP, self).__init__()

        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormRelu(
            mid_channels, mid_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(
            mid_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        # スキップ結合
        self.cb_residual = conv2DBatchNorm(
            in_channels, out_channels, kernel_size=1, stride=stride, padding=0, dilation=1, bias=False)

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = self.cb_residual(x)
        return self.relu(conv + residual)


class bottleNeckIdentifyPSP(nn.Module):#net##ResidualBlockPSP
    def __init__(self, in_channels, mid_channels, stride, dilation):
        super(bottleNeckIdentifyPSP, self).__init__()

        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormRelu(
            mid_channels, mid_channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(
            mid_channels, in_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = x
        return self.relu(conv + residual)


class PyramidPooling(nn.Module):#net
    def __init__(self, in_channels, pool_sizes, height, width):
        super(PyramidPooling, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        # 各畳み込み層の出力チャネル数
        out_channels = int(in_channels / len(pool_sizes))

        # 各畳み込み層を作成
        # この実装方法は愚直すぎてfor文で書きたいところですが、分かりやすさを優先しています
        # pool_sizes: [6, 3, 2, 1]
        self.avpool_1 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[0])
        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_2 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[1])
        self.cbr_2 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_3 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[2])
        self.cbr_3 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_4 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[3])
        self.cbr_4 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

    def forward(self, x):

        out1 = self.cbr_1(self.avpool_1(x))
        out1 = F.interpolate(out1, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out2 = self.cbr_2(self.avpool_2(x))
        out2 = F.interpolate(out2, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out3 = self.cbr_3(self.avpool_3(x))
        out3 = F.interpolate(out3, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out4 = self.cbr_4(self.avpool_4(x))
        out4 = F.interpolate(out4, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        # 最終的に結合させる、dim=1でチャネル数の次元で結合
        output = torch.cat([x, out1, out2, out3, out4], dim=1)

        return output


class DecodePSPFeature(nn.Module):#net
    def __init__(self, height, width, n_classes):
        super(DecodePSPFeature, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        self.cbr = conv2DBatchNormRelu(
            in_channels=4096, out_channels=512, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(
            in_channels=512, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(
            x, size=(self.height, self.width), mode="bilinear", align_corners=True)

        return output


class AuxiliaryPSPlayers(nn.Module):#net
    def __init__(self, in_channels, height, width, n_classes):
        super(AuxiliaryPSPlayers, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        self.cbr = conv2DBatchNormRelu(
            in_channels=in_channels, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(
            in_channels=256, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(
            x, size=(self.height, self.width), mode="bilinear", align_corners=True)

        return output

def run_model(net, dataloader):

    ##CPU／GPUの切り替え
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    print("使用デバイス：", device)
    print(torch.cuda.is_available())

    torch.backends.cudnn.benchmark = True

    # 画像の枚数
    num_imgs = len(dataloader.dataset)

    batch_size = dataloader.batch_size

    net.eval()   # モデルを検証モードに

    # データローダーからminibatchずつ取り出すループ
    for imges in dataloader:

        img_stock = []
        picup_img = []

        for r in range(num_imgs):
            
            img = imges[r, :, :, :].numpy().transpose(2, 1, 0)
            img = np.fliplr(img)
            img = np.rot90(img, 1)

            picup_img.append(img)

            img = img.transpose(2, 1, 0)
            new_imge = torch.from_numpy(img.astype(np.float64)).clone()
            img_stock.append(new_imge)
                    
        new_imges = torch.stack(img_stock)
    
        # ミニバッチがサイズが1だと、バッチノーマライゼーションでエラーになるのでさける
        if new_imges.size()[0] == 1:
            return

        # GPUが使えるならGPUにデータを送る
        new_imges = new_imges.to(device, dtype=torch.float)

        outputs = net(new_imges)
        output = outputs[0]#AuxLoss側は無視　yサイズはtorch.Size()

        device2 = torch.device('cpu')
        output = output.to(device2)#CPU／GPUの切り替え

        #結果を保存
        with torch.no_grad():
            for r in range(len(output)):
                out_img = output[r]
                out_img = out_img.numpy()
                out_img = np.argmax(out_img, axis=0)#一番大きい要素のインデックスを返す(ピクセル毎に確信度が最大のクラスを求める
                class_img = Image.fromarray(255-np.uint8(out_img)*255, mode="P")
                class_img.putpalette([255,255,0])#黄色に設定
                class_img = class_img.convert('RGB')
                result_img = Image.new('RGBA', class_img.size, (0, 0, 0, 0))
                for x in range(475):
                    for y in range(475):
                        #学習結果画像のピクセルデータを取得
                        ##黄色
                        pixel = class_img.getpixel((x, y))
                
                        if pixel == (255, 255, 0):
                            result_img.putpixel((x, y), (255, 255, 0, 150))#yellow
                        
                img = Image.fromarray((picup_img[r]*255).astype(np.uint8))
                result_img = Image.alpha_composite(img.convert('RGBA'), class_img)
                result_img = cv2.cvtColor(np.asarray(result_img), cv2.COLOR_RGBA2BGRA)
                if os.path.exists(PATH + '/pspnet_inference_result') == False:
                    os.mkdir(PATH + '/pspnet_inference_result')
                cv2.imwrite(PATH + '/pspnet_inference_result/'+dataloader.dataset.id_list(r)+'.jpg', result_img)
        
    return


"""セマンティックセグメンテーションの推論"""

##ファイルパスリストの作成
rootpath = os.path.abspath(ROOTPH)
if os.path.exists(rootpath):
  print('-----------exist folder------------------')
  print('folder:',rootpath)

imgpath_list,id_names_list = make_datapath_list(rootpath=rootpath)

print(imgpath_list[0:3])
print(id_names_list[0:3])

##PSPnetの用意
net = PSPNet(n_classes = 2)

#学習済みパラメータをロード
state_dict = torch.load(STADIED_PATH, map_location={"cuda:0": "cpu"})
net.load_state_dict(state_dict)
print("ネットワーク設定完了：学習済みの重みをロードしました。")

""" データセット作成"""
dataset = VOCDataset(imgpath_list,transform=DataTransform(input_size=475),id_names=id_names_list)

"""データローダーの作成"""
dataloader = data.DataLoader(dataset, batch_size=2, shuffle=False)

# 動作の確認
batch_iterator = iter(dataloader)  # イタレータに変換
imges= next(batch_iterator)  # 1番目の要素を取り出す
print(imges.size())  # torch.Size([8, 3, 475, 475])


-----------exist folder------------------
folder: C:\Users\Karen\Documents\PSPNet\10m_image
['C:\\Users\\Karen\\Documents\\PSPNet\\10m_image\\DJI_0001.jpg', 'C:\\Users\\Karen\\Documents\\PSPNet\\10m_image\\DJI_0002.jpg', 'C:\\Users\\Karen\\Documents\\PSPNet\\10m_image\\DJI_0003.jpg']
['DJI_0001', 'DJI_0002', 'DJI_0003']
ネットワーク設定完了：学習済みの重みをロードしました。


  img = img.resize((self.input_size, self.input_size),Image.BICUBIC)


torch.Size([2, 3, 475, 475])


In [4]:
run_model(net, dataloader)

使用デバイス： cuda:0
True


  img = img.resize((self.input_size, self.input_size),Image.BICUBIC)


IndexError: index 2 is out of bounds for dimension 0 with size 2