In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
import random
from scipy.special import expit
from PIL import Image
from PIL import ImageDraw
import numpy as np
import matplotlib.pyplot as plt
import math
from scipy.special import expit
import cv2

In [14]:
def conv_batch(in_num, out_num, kernel_size=3, padding=1, stride=1):
    return nn.Sequential(
        nn.Conv2d(in_num, out_num, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
        nn.BatchNorm2d(out_num),
        nn.LeakyReLU())

def conv_block():
    return nn.Sequential(
        conv_batch(1024, 512, stride=1, kernel_size=1, padding=0),
        conv_batch(512, 1024, stride=1, kernel_size=3, padding=1),
        conv_batch(1024, 512, stride=1, kernel_size=1, padding=0),
        conv_batch(512, 1024, stride=1, kernel_size=3, padding=1),
        conv_batch(1024, 512, stride=1, kernel_size=1, padding=0),
    )

def conv_block2():
    return nn.Sequential(
        conv_batch(768, 256, stride=1, kernel_size=1, padding=0),
        conv_batch(256, 512, stride=1, kernel_size=3, padding=1),
        conv_batch(512, 256, stride=1, kernel_size=1, padding=0),
        conv_batch(256, 512, stride=1, kernel_size=3, padding=1),
        conv_batch(512, 256, stride=1, kernel_size=1, padding=0),
    )

def conv_block3():
    return nn.Sequential(
        conv_batch(384, 128, stride=1, kernel_size=1, padding=0),
        conv_batch(128, 256, stride=1, kernel_size=3, padding=1),
        conv_batch(256, 128, stride=1, kernel_size=1, padding=0),
        conv_batch(128, 256, stride=1, kernel_size=3, padding=1),
        conv_batch(256, 128, stride=1, kernel_size=1, padding=0),
    )

# Residual block
class DarkResidualBlock(nn.Module):
    def __init__(self, in_channels):
        super(DarkResidualBlock, self).__init__()

        reduced_channels = int(in_channels/2)

        self.layer1 = conv_batch(in_channels, reduced_channels, kernel_size=1, padding=0)
        self.layer2 = conv_batch(reduced_channels, in_channels, kernel_size=3, padding=1)

    def forward(self, x):
        residual = x

        out = self.layer1(x)
        out = self.layer2(out)
        out += residual
        return out

class Darknet53(nn.Module):
    def __init__(self, block):
        super(Darknet53, self).__init__()
        
        #Backbone
        self.conv1 = conv_batch(3, 32)
        self.conv2 = conv_batch(32, 64, stride=2)
        self.residual_block1 = self.make_layer(block, in_channels=64, num_blocks=1)
        self.conv3 = conv_batch(64, 128, stride=2)
        self.residual_block2 = self.make_layer(block, in_channels=128, num_blocks=2)
        self.conv4 = conv_batch(128, 256, stride=2)
        self.residual_block3 = self.make_layer(block, in_channels=256, num_blocks=8)
        self.conv5 = conv_batch(256, 512, stride=2)
        self.residual_block4 = self.make_layer(block, in_channels=512, num_blocks=8)
        self.conv6 = conv_batch(512, 1024, stride=2)
        self.residual_block5 = self.make_layer(block, in_channels=1024, num_blocks=4)
    
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.residual_block1(out)
        out = self.conv3(out)
        out = self.residual_block2(out)
        out = self.conv4(out)
        out = self.residual_block3(out)
        s = out
        out = self.conv5(out)
        out = self.residual_block4(out)
        m = out
        out = self.conv6(out)
        l = self.residual_block5(out)
        
        return l, m, s

    def make_layer(self, block, in_channels, num_blocks):
        layers = []
        for i in range(0, num_blocks):
            layers.append(block(in_channels))
        return nn.Sequential(*layers)

    
class Neck(nn.Module):
    def __init__(self, ):
        super(Neck, self).__init__()
        self.lconvblock = conv_block()
        
        self.lup = conv_batch(512, 256, stride=1, kernel_size=1, padding=0)
        self.lupsample = nn.Upsample(scale_factor=2, mode='bilinear',align_corners=True)
        self.mconvblock = conv_block2()
        
        self.mup = conv_batch(256, 128, stride=1, kernel_size=1, padding=0)
        self.mupsample = nn.Upsample(scale_factor=2, mode='bilinear',align_corners=True)
        self.sconvblock = conv_block3()
        

    def forward(self, l, m, s):
        lout = self.lconvblock(l)
        
        mout = self.lup(lout)
        mout = self.lupsample(mout)
        mout = torch.cat([m,mout],dim=1)
        mout = self.mconvblock(mout)
        
        sout = self.mup(mout)
        sout = self.mupsample(sout)
        sout = torch.cat([s,sout],dim=1)
        sout = self.sconvblock(sout)
        
        return lout, mout, sout    

class Yolov3(nn.Module):
    def __init__(self, backbone, num_boxes):
        super(Yolov3, self).__init__()
        self.backbone = backbone
        self.neck = Neck()
        
        self.lconv = conv_batch(512, 1024, stride=1, kernel_size=3, padding=1) 
        self.lpred = nn.Conv2d(1024, num_boxes*6 , kernel_size=[1,1], stride=1, padding=0, bias=True)
        
        self.mconv = conv_batch(256, 512, stride=1, kernel_size=3, padding=1) 
        self.mpred = nn.Conv2d(512, num_boxes*6 , kernel_size=[1,1], stride=1, padding=0, bias=True)
        
        self.sconv = conv_batch(128, 256, stride=1, kernel_size=3, padding=1)
        self.spred = nn.Conv2d(256, num_boxes*6 , kernel_size=[1,1], stride=1, padding=0, bias=True)
        
    def forward(self, x):
        l,m,s = self.backbone(x)
        lout, mout, sout  = self.neck(l,m,s)
        
        lout = self.lconv(lout)
        lout = self.lpred(lout)

        mout = self.mconv(mout)
        mout = self.mpred(mout)
        
        sout = self.sconv(sout)
        sout = self.spred(sout)

        
        lout[:,:5,:,:] = torch.sigmoid(lout[:,:5,:,:])
        mout[:,:5,:,:] = torch.sigmoid(mout[:,:5,:,:])
        sout[:,:5,:,:] = torch.sigmoid(sout[:,:5,:,:])
        
        lout[:,25:,:,:] = torch.sigmoid(lout[:,25:,:,:])
        mout[:,25:,:,:] = torch.sigmoid(mout[:,25:,:,:])
        sout[:,25:,:,:] = torch.sigmoid(sout[:,25:,:,:])
        
        return  lout, mout, sout

In [15]:
def resizer(pic,pixel = 512):
    ppic = np.array(pic)
    orsize = max(ppic.shape[:2])
    orsizemin = min(ppic.shape[:2])
    whichmax = np.argmax(ppic.shape)

    top = int((max(ppic.shape) - ppic.shape[0])/2)
    bottom = int((max(ppic.shape) - ppic.shape[0])/2)
    left = int((max(ppic.shape) - ppic.shape[1])/2)
    right = int((max(ppic.shape) - ppic.shape[1])/2)

    ppic = cv2.copyMakeBorder(ppic,top,bottom,left,right,borderType=cv2.BORDER_CONSTANT)
    ppic = Image.fromarray(ppic)
    ppic = ppic.resize((pixel,pixel))
    border = [top,bottom,left,right]
    return np.moveaxis(np.asarray(ppic),-1,0)[np.newaxis,:,:,:]

In [16]:
num_boxes=5
backbone = Darknet53(DarkResidualBlock)
model = Yolov3(backbone,num_boxes=num_boxes)

In [17]:
ncentroids = np.load('wholencentorids_multidim.npy')
scent = ncentroids[:num_boxes]
mcent = ncentroids[num_boxes:num_boxes*2]
lcent = ncentroids[num_boxes*2:]

In [18]:
device='cuda'
model_name = 'FDM_YOLOv3_myown_512_muldim_newarch_v.1.pth'
model.to(device)

Yolov3(
  (backbone): Darknet53(
    (conv1): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (conv2): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (residual_block1): Sequential(
      (0): DarkResidualBlock(
        (layer1): Sequential(
          (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (layer2): Sequential(
          (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): B

In [19]:
def decoder(out,cc,num_box):
    dim = out.shape[2]
    out2 = out.clone().cpu()
    
    cx = torch.tensor(list(range(0,dim))*dim).reshape(dim,dim).T.float()
    cy = torch.tensor(list(range(0,dim))*dim).reshape(dim,dim).float()
    pw = torch.moveaxis(torch.tensor(cc[:,0].tolist()*(dim**2)).reshape(dim,dim,num_box),-1,0)
    ph = torch.moveaxis(torch.tensor(cc[:,1].tolist()*(dim**2)).reshape(dim,dim,num_box),-1,0)

    out2[:,num_box:num_box*2,:,:] = torch.sigmoid(out2[:,num_box:num_box*2,:,:])*1.1-0.05 + cx
    out2[:,num_box*2:num_box*3,:,:] = torch.sigmoid(out2[:,num_box*2:num_box*3,:,:])*1.1-0.05 + cy

    out2[:,num_box*3:num_box*5,:,:] = torch.exp(out2[:,num_box*3:num_box*5,:,:])
    out2[:,num_box*3:num_box*4,:,:] = out2[:,num_box*3:num_box*4,:,:] * pw
    out2[:,num_box*4:num_box*5,:,:] = out2[:,num_box*4:num_box*5,:,:] * ph
    return out2

In [20]:
model.load_state_dict(torch.load(model_name,map_location=device))

<All keys matched successfully>

In [21]:
model.eval()
def npiou(t1,t2):
    cx1,cy1,w1,h1 = t1[:,0], t1[:,1], t1[:,2], t1[:,3]
    cx2,cy2,w2,h2 = t2[:,0], t2[:,1], t2[:,2], t2[:,3]
    
    ltx1 = cx1 - w1/2
    lty1 = cy1 - h1/2
    rbx1 = cx1 + w1/2
    rby1 = cy1 + h1/2
    area1 = w1 * h1
    
    ltx2 = cx2 - w2/2
    lty2 = cy2 - h2/2
    rbx2 = cx2 + w2/2
    rby2 = cy2 + h2/2
    area2 = w2 * h2
    
    xm = np.vstack([ltx1,ltx2]).T.max(axis=1)
    ym = np.vstack([lty1,lty2]).T.max(axis=1)
    xmi = np.vstack([rbx1,rbx2]).T.min(axis=1)
    ymi = np.vstack([rby1,rby2]).T.min(axis=1)
    inter = (xmi-xm).clip(0) * (ymi-ym).clip(0)
    
    iou = inter/(area1+area2-inter)
    return iou

In [22]:
def nms2(lout,mout,sout,conf_thres,iou_thres,num_boxes):
    lt = torch.where(lout[0][:num_boxes,:,:] > conf_thres)
    mt = torch.where(mout[0][:num_boxes,:,:] > conf_thres)
    st = torch.where(sout[0][:num_boxes,:,:] > conf_thres)
    
    if (len(torch.where(lout[0][:num_boxes,:,:] >conf_thres)[0])==0) &\
    (len(torch.where(mout[0][:num_boxes,:,:] > conf_thres)[0])==0) &\
    (len(torch.where(sout[0][:num_boxes,:,:] > conf_thres)[0])==0):
        return []
    else:
        lscore = np.array(lout[0][lt[0],lt[1],lt[2]])
        lcx = lout[0][num_boxes+lt[0],lt[1],lt[2]].flatten().detach().cpu()*32
        lcy = lout[0][num_boxes*2+lt[0],lt[1],lt[2]].flatten().detach().cpu()*32
        lpw = lout[0][num_boxes*3+lt[0],lt[1],lt[2]]*32
        lph = lout[0][num_boxes*4+lt[0],lt[1],lt[2]]*32
        
        mscore = np.array(mout[0][mt[0],mt[1],mt[2]])
        mcx = mout[0][num_boxes+mt[0],mt[1],mt[2]].flatten().detach().cpu()*16
        mcy = mout[0][num_boxes*2+mt[0],mt[1],mt[2]].flatten().detach().cpu()*16
        mpw = mout[0][num_boxes*3+mt[0],mt[1],mt[2]]*16
        mph = mout[0][num_boxes*4+mt[0],mt[1],mt[2]]*16
        
        sscore = np.array(sout[0][st[0],st[1],st[2]])
        scx = sout[0][num_boxes+st[0],st[1],st[2]].flatten().detach().cpu()*8
        scy = sout[0][num_boxes*2+st[0],st[1],st[2]].flatten().detach().cpu()*8
        spw = sout[0][num_boxes*3+st[0],st[1],st[2]]*8
        sph = sout[0][num_boxes*4+st[0],st[1],st[2]]*8
        
        scores = np.concatenate([lscore,mscore,sscore])
        cx = torch.cat([lcx,mcx,scx])
        cy = torch.cat([lcy,mcy,scy])
        pw = torch.cat([lpw,mpw,spw])
        ph = torch.cat([lph,mph,sph])

        argscores = np.argsort(scores)
        scores = scores[argscores][::-1]
        bboxes= np.array(torch.vstack([cx,cy,pw,ph]).T)
        bboxes = bboxes[argscores][::-1]

        selecbox = []
        while len(bboxes)!=0:
            selecbox.append(bboxes[0])
            bboxes = bboxes[1:,:]
            if len(bboxes) == 0:
                break
            temprep = np.vstack([selecbox[-1]]*len(bboxes))
            bboxes = bboxes[npiou(bboxes,temprep) < iou_thres]
        return np.vstack(selecbox)

In [23]:
def boxcoord(pic):
    ogmax = max(pic.shape)
    whichmax = np.argmax(pic.shape)
    padsize = int(abs(pic.shape[0]-pic.shape[1])/2)
    with torch.no_grad():
        testpic=(torch.tensor(resizer(pic)).float()/255).reshape(1,3,512,512)
        lout, mout, sout = model(testpic.to(device))
        lout, mout, sout = lout.detach().cpu(), mout.detach().cpu(), sout.detach().cpu()
        lout, mout, sout = decoder(lout,lcent,num_boxes), decoder(mout,mcent,num_boxes), decoder(sout,scent,num_boxes)

    boxes = nms2(lout, mout, sout,.6,.05,num_boxes)
    x = boxes[:,0] - boxes[:,2]/2
    y = boxes[:,1] - boxes[:,3]/2
    boxes[:,0] = x
    boxes[:,1] = y
    boxes = ogmax*(boxes/512)
    if whichmax == 0:
        boxes[:,0] = boxes[:,0] - padsize
    else:
        boxes[:,1] = boxes[:,1] - padsize
    return boxes

In [24]:
# 카메라 호출, USB 카메라를 따로 사용하여 디바이스 순번 ID 1번  사용,
# 기본 내장 카메라 는 보통 순번 ID 0 번 사용
vcap = cv2.VideoCapture(0)

# 카메라 속성 설정
vcap.set(cv2.CAP_PROP_FRAME_WIDTH, 512)  # 세로 사이즈
vcap.set(cv2.CAP_PROP_FRAME_HEIGHT, 512) # 가로 사이즈

True

In [25]:
import matplotlib.pyplot as plt

In [26]:
def videoDetector(vcap) :
    faces_cnt = 0 # 검출 얼굴수
    # 카메라의 프레임을 지속적으로 받아오기
    while True :
        # vcap.read() 프레임 읽기
        # ret 은 카메라 상태 이며, 정상 : True, 비정상 : False
        # frame 은 현재시점의 플레임
        ret, frame = vcap.read()

        # #  숫자키 1 입력시 좌우 대칭 변경
        # if cv2.waitKey(1) == 49 :
        #     # 대칭 처리
        #     # value < 0 상하좌우 대칭
        #     # value = 0 상하 대칭
        #     # value > 0  좌우 대칭
        #     frame = cv2.flip(frame, 1)
        frame = cv2.flip(frame, 1) # 좌우 대칭 변경
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # 영상 이미지에서 얼굴 검출 하기
        #  gray                    # 대상 이미지 행렬
        # ,scaleFactor  = 1.1      # 이미지 피라미드 규모인자 크기(Scale Factor)
        # ,minNeighbors = 5        # 최종 검출영역 확정용 이웃 사각형의 갯수 설정
        faces = boxcoord(frame)

        # 화면에 검출 된 얼굴 수 가 변경될때마다 출력
        if faces_cnt != len(faces) :
            faces_cnt = len(faces)
            if faces_cnt != 0 :
                print("현재 검출된 얼굴 수 : ", str(faces_cnt))

        # 검출된 안면에 사각형 그리기
        # cv2.rectangle(영상이미지, (x1, y1), (x2, y2), (B, G, R), 두깨, 선형타입)
        # (X1, Y1) 좌측 상단 모서리, (X2, Y2) 우측 하단 모서리.
        if len(faces) :
            for  x, y, w, h in faces :
                cv2.rectangle(frame, (int(x), int(y)), (int(x + w), int(y + h)), (255,255,255), 2, cv2.LINE_4)

        # 윈도우창 (Title , 프레임 이미지)
        cv2.imshow("VideoFrame", frame)

        # cvs2.waitKey(1) 1은 밀리세컨으로 키입력값 대기 지연시간이다. ESC로 멈춤
        if cv2.waitKey(1) == 27 :
            vcap.release()          # 메모리 해제
            cv2.destroyAllWindows() # 모든창 제거, 특정 창만듣을 경우 ("VideoFrame")
            break;

In [27]:
# 실시간 영상에서 얼굴 검출 호출
videoDetector(vcap)

현재 검출된 얼굴 수 :  1


error: OpenCV(4.5.2) :-1: error: (-5:Bad argument) in function 'rectangle'
> Overload resolution failed:
>  - Can't parse 'pt1'. Sequence item with index 0 has a wrong type
>  - Can't parse 'pt1'. Sequence item with index 0 has a wrong type
>  - Can't parse 'rec'. Expected sequence length 4, got 2
>  - Can't parse 'rec'. Expected sequence length 4, got 2


In [37]:
faces_cnt = 0 # 검출 얼굴수
# 카메라의 프레임을 지속적으로 받아오기
while True :
    # vcap.read() 프레임 읽기
    # ret 은 카메라 상태 이며, 정상 : True, 비정상 : False
    # frame 은 현재시점의 플레임
    ret, frame = vcap.read()

    # #  숫자키 1 입력시 좌우 대칭 변경
    # if cv2.waitKey(1) == 49 :
    #     # 대칭 처리
    #     # value < 0 상하좌우 대칭
    #     # value = 0 상하 대칭
    #     # value > 0  좌우 대칭
    #     frame = cv2.flip(frame, 1)
    frame = cv2.flip(frame, 1) # 좌우 대칭 변경
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # 영상 이미지에서 얼굴 검출 하기
    #  gray                    # 대상 이미지 행렬
    # ,scaleFactor  = 1.1      # 이미지 피라미드 규모인자 크기(Scale Factor)
    # ,minNeighbors = 5        # 최종 검출영역 확정용 이웃 사각형의 갯수 설정
    faces = boxcoord(frame)

    # 화면에 검출 된 얼굴 수 가 변경될때마다 출력
    if faces_cnt != len(faces) :
        faces_cnt = len(faces)
        if faces_cnt != 0 :
            print("현재 검출된 얼굴 수 : ", str(faces_cnt))

    # 검출된 안면에 사각형 그리기
    # cv2.rectangle(영상이미지, (x1, y1), (x2, y2), (B, G, R), 두깨, 선형타입)
    # (X1, Y1) 좌측 상단 모서리, (X2, Y2) 우측 하단 모서리.
    if len(faces) :
        for  x, y, w, h in faces :
            cv2.rectangle(frame, (int(x), int(y)), (int(x + w), int(y + h)), (255,255,255), 2, cv2.LINE_4)

    # 윈도우창 (Title , 프레임 이미지)
    cv2.imshow("VideoFrame", frame)

    # cvs2.waitKey(1) 1은 밀리세컨으로 키입력값 대기 지연시간이다. ESC로 멈춤
    if cv2.waitKey(1) == 27 :
        vcap.release()          # 메모리 해제
        cv2.destroyAllWindows() # 모든창 제거, 특정 창만듣을 경우 ("VideoFrame")
        break;

현재 검출된 얼굴 수 :  1


KeyboardInterrupt: 

In [36]:
cv2.rectangle(frame, (x, y), (x + w, y + h), (255,255,255), 3, cv2.LINE_4)

error: OpenCV(4.5.2) :-1: error: (-5:Bad argument) in function 'rectangle'
> Overload resolution failed:
>  - Can't parse 'pt1'. Sequence item with index 0 has a wrong type
>  - Can't parse 'pt1'. Sequence item with index 0 has a wrong type
>  - Can't parse 'rec'. Expected sequence length 4, got 2
>  - Can't parse 'rec'. Expected sequence length 4, got 2


In [32]:
for  x, y, w, h in faces:
    print(x)

172.69188
