In [23]:
import cv2
import os
import torch
import torch.nn.functional as F
import numpy as np
from PIL import Image 

from loguru import logger
import models

# FILE_PATH = 'samples/daejeon1_000000_000001_leftImg8bit.png'
# FILE_PATH = 'samples/test.mp4'
FILE_PATH = 'samples/0001TP_009210.png'
FILE_NAME = os.path.basename(FILE_PATH)
SAVE_DIR = 'output/'
FONTSCALE = 1
FONT = cv2.FONT_HERSHEY_SIMPLEX
THICKNESS = 2
BLUE_COLOR = (0,0,255)
RED_COLOR = (255,40,40)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
color_map = [(128, 64,128),
             (244, 35,232),
             ( 70, 70, 70),
             (102,102,156),
             (190,153,153),
             (153,153,153),
             (250,170, 30),
             (220,220,  0),
             (107,142, 35),
             (152,251,152),
             ( 70,130,180),
             (220, 20, 60),
             (255,  0,  0),
             (  0,  0,142),
             (  0,  0, 70),
             (  0, 60,100),
             (  0, 80,100),
             (  0,  0,230),
             (119, 11, 32)]

def input_transform(image):
    image = image.astype(np.float32)[:, :, ::-1]
    image = image / 255.0
    image -= mean
    image /= std
    return image

def load_pretrained(model, pretrained):
    pretrained_dict = torch.load(pretrained, map_location='cpu')
    if 'state_dict' in pretrained_dict:
        pretrained_dict = pretrained_dict['state_dict']
    model_dict = model.state_dict()
    pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if (k[6:] in model_dict and v.shape == model_dict[k[6:]].shape)}
    msg = 'Loaded {} parameters!'.format(len(pretrained_dict))
    logger.info('load_weights_start')
    logger.info(msg)
    logger.info('load_weights_done')
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict, strict = False)
    
    return model

def find_contours(mask):
    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def calculate_centroid(contour):
    M = cv2.moments(contour)
    if M['m00'] != 0:
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])
        return (cx, cy)
    return None

def is_centroid_in_road(centroid, road_mask):
    if centroid is not None:
        cx, cy = centroid
        return road_mask[cy,cx]
    return False

    
def process_image(file_path, model):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR)
    
    height, width, _ = img.shape
    
    sv_img = np.zeros_like(img).astype(np.uint8)
    img = input_transform(img)
    img = img.transpose((2, 0, 1)).copy()
    img = torch.from_numpy(img).unsqueeze(0).cuda()
    
    pred = model(img)
    pred = F.interpolate(pred, size=img.size()[-2:], mode='bilinear', align_corners=True)
    pred = torch.argmax(pred, dim=1).squeeze(0).cpu().numpy()
    
    # 사람의 중심점이 도로위에 있는지 확인
    road_mask = (pred == 3).astype(np.uint8)
    pedestrian_mask = (pred == 9).astype(np.uint8)
    
    road_contours = find_contours(road_mask)
    pedestrian_contours = find_contours(pedestrian_mask)
    
    for i, color in enumerate(color_map):
        for j in range(3):
            sv_img[:, :, j][pred == i] = color_map[i][j]
            
    person_on_road = False
    for contour in pedestrian_contours:
        centroid = calculate_centroid(contour)
        if is_centroid_in_road(centroid, road_mask):
            person_on_road = True
            break
    
    text = f'Person on road: {person_on_road}'
    
    FONTSCALE = 1
    FONT = cv2.FONT_HERSHEY_SIMPLEX
    THICKNESS = 2
    COLOR = BLUE_COLOR if person_on_road else RED_COLOR
    
    (text_width, text_height), _ = cv2.getTextSize(text, FONT, FONTSCALE, THICKNESS)
    
    x = (width - text_width) // 2
    y = 40 + text_height
    cv2.putText(sv_img, text, (x, y), FONT, FONTSCALE, COLOR, THICKNESS, cv2.LINE_AA)
    
    sv_img = Image.fromarray(sv_img)
    
    os.makedirs(SAVE_DIR, exist_ok=True)
    sv_img.save(os.path.join(SAVE_DIR, FILE_NAME))
def process_video(file_path, model):
    cap = cv2.VideoCapture(file_path)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    # out = cv2.VideoWriter(os.path.join(SAVE_DIR, os.path.splitext(FILE_NAME)[0] + '.avi'), fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))
    out = cv2.VideoWriter(os.path.join(SAVE_DIR, FILE_NAME), fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        height, width, _ = frame.shape

        
        sv_img = np.zeros_like(frame).astype(np.uint8)
        img = input_transform(frame)
        img = img.transpose((2, 0, 1)).copy()
        img = torch.from_numpy(img).unsqueeze(0).cuda()
        
        pred = model(img)
        pred = F.interpolate(pred, size=img.size()[-2:], mode='bilinear', align_corners=True)
        pred = torch.argmax(pred, dim=1).squeeze(0).cpu().numpy()
        
        road_mask = (pred == 3).astype(np.uint8)
        pedestrian_mask = (pred == 9).astype(np.uint8)
        
        road_contours = find_contours(road_mask)
        pedestrian_contours = find_contours(pedestrian_mask)
        
        for i, color in enumerate(color_map):
            for j in range(3):
                sv_img[:, :, j][pred == i] = color_map[i][j]
                
        person_on_road = False
        for contour in pedestrian_contours:
            centroid = calculate_centroid(contour)
            if is_centroid_in_road(centroid, road_mask):
                person_on_road = True
                break
        
        text = f'Person on road: {person_on_road}'
        
        FONTSCALE = 1
        FONT = cv2.FONT_HERSHEY_SIMPLEX
        THICKNESS = 2
        COLOR = BLUE_COLOR if person_on_road else RED_COLOR
        
        (text_width, text_height), _ = cv2.getTextSize(text, FONT, FONTSCALE, THICKNESS)
        
        x = (width - text_width) // 2
        y = 40 + text_height
        cv2.putText(sv_img, text, (x, y), FONT, FONTSCALE, COLOR, THICKNESS, cv2.LINE_AA)
        
        sv_img = cv2.cvtColor(sv_img, cv2.COLOR_RGB2BGR)
        out.write(sv_img)
    
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    
       
is_cityscape = False
model = models.pidnet.get_pred_model('pidnet-s', 19 if is_cityscape else 11)
model = load_pretrained(model, 'weights/PIDNet_S_Camvid_Test.pt').cuda()
model.eval()
ext = os.path.splitext(FILE_PATH)[-1].lower()
if ext in ['.png', '.jpg', '.jpeg']:
    process_image(FILE_PATH, model)
elif ext in ['.mp4', '.avi', '.mov']:
    process_video(FILE_PATH, model)
else:
    raise ValueError("Unsupported file format")
    

[32m2024-05-23 20:14:55.448[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_pretrained[0m:[36m58[0m - [1mload_weights_start[0m
[32m2024-05-23 20:14:55.449[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_pretrained[0m:[36m59[0m - [1mLoaded 453 parameters![0m
[32m2024-05-23 20:14:55.449[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_pretrained[0m:[36m60[0m - [1mload_weights_done[0m


In [28]:
import numpy as np
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
from loguru import logger
import models
import os

np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)

# 설정 값들
FILE_PATH = 'samples/test.mp4'
FILE_NAME = os.path.basename(FILE_PATH)
SAVE_DIR = 'output/'
FONTSCALE = 1
FONT = cv2.FONT_HERSHEY_SIMPLEX
THICKNESS = 2
BLUE_COLOR = (0, 0, 255)
RED_COLOR = (255, 40, 40)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
color_map = [
    (128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153),
    (153, 153, 153), (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152),
    (70, 130, 180), (220, 20, 60), (255, 0, 0), (0, 0, 142), (0, 0, 70),
    (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)
]

def input_transform(image):
    image = image.astype(np.float32)[:, :, ::-1]
    image = image / 255.0
    image -= mean
    image /= std
    return image

def load_pretrained(model, pretrained):
    pretrained_dict = torch.load(pretrained, map_location='cpu')
    if 'state_dict' in pretrained_dict:
        pretrained_dict = pretrained_dict['state_dict']
    model_dict = model.state_dict()
    pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if (k[6:] in model_dict and v.shape == model_dict[k[6:]].shape)}
    msg = 'Loaded {} parameters!'.format(len(pretrained_dict))
    logger.info('load_weights_start')
    logger.info(msg)
    logger.info('load_weights_done')
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict, strict=False)
    return model

def find_contours(mask):
    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def is_person_near_road(pedestrian_mask, road_mask, threshold=1):
    kernel = np.ones((3, 3), np.uint8)
    dilated_road_mask = cv2.dilate(road_mask, kernel, iterations=threshold)
    return np.any(np.logical_and(pedestrian_mask, dilated_road_mask))

def process_image(file_path, model):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR)
    height, width, _ = img.shape

    sv_img = np.zeros_like(img).astype(np.uint8)
    img = input_transform(img)
    img = img.transpose((2, 0, 1)).copy()
    img = torch.from_numpy(img).unsqueeze(0).cuda()

    pred = model(img)
    pred = F.interpolate(pred, size=img.size()[-2:], mode='bilinear', align_corners=True)
    pred = torch.argmax(pred, dim=1).squeeze(0).cpu().numpy()

    road_mask = (pred == 3).astype(np.uint8)
    pedestrian_mask = (pred == 9).astype(np.uint8)

    for i, color in enumerate(color_map):
        for j in range(3):
            sv_img[:, :, j][pred == i] = color_map[i][j]

    person_on_road = is_person_near_road(pedestrian_mask, road_mask)

    text = f'Person on road: {person_on_road}'
    COLOR = BLUE_COLOR if person_on_road else RED_COLOR

    (text_width, text_height), _ = cv2.getTextSize(text, FONT, FONTSCALE, THICKNESS)
    x = (width - text_width) // 2
    y = 40 + text_height
    cv2.putText(sv_img, text, (x, y), FONT, FONTSCALE, COLOR, THICKNESS, cv2.LINE_AA)

    sv_img = Image.fromarray(sv_img)
    os.makedirs(SAVE_DIR, exist_ok=True)
    sv_img.save(os.path.join(SAVE_DIR, FILE_NAME))

def process_video(file_path, model):
    cap = cv2.VideoCapture(file_path)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(os.path.join(SAVE_DIR, os.path.splitext(FILE_NAME)[0] + '.avi'), fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        height, width, _ = frame.shape
        sv_img = np.zeros_like(frame).astype(np.uint8)
        img = input_transform(frame)
        img = img.transpose((2, 0, 1)).copy()
        img = torch.from_numpy(img).unsqueeze(0).cuda()

        pred = model(img)
        pred = F.interpolate(pred, size=img.size()[-2:], mode='bilinear', align_corners=True)
        pred = torch.argmax(pred, dim=1).squeeze(0).cpu().numpy()

        road_mask = (pred == 3).astype(np.uint8)
        pedestrian_mask = (pred == 9).astype(np.uint8)

        for i, color in enumerate(color_map):
            if i ==4 or i==5:
                for j in range(3):
                    sv_img[:, :, j][pred == i] = color_map[i][j]

        person_on_road = is_person_near_road(pedestrian_mask, road_mask)

        text = f'Person on road: {person_on_road}'
        COLOR = BLUE_COLOR if person_on_road else RED_COLOR

        (text_width, text_height), _ = cv2.getTextSize(text, FONT, FONTSCALE, THICKNESS)
        x = (width - text_width) // 2
        y = 40 + text_height
        cv2.putText(sv_img, text, (x, y), FONT, FONTSCALE, COLOR, THICKNESS, cv2.LINE_AA)

        sv_img = cv2.cvtColor(sv_img, cv2.COLOR_RGB2BGR)
        out.write(sv_img)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

is_cityscape = False
model = models.pidnet.get_pred_model('pidnet-s', 19 if is_cityscape else 11)
model = load_pretrained(model, 'weights/PIDNet_S_Camvid_Test.pt').cuda()
model.eval()

ext = os.path.splitext(FILE_PATH)[-1].lower()
if ext in ['.png', '.jpg', '.jpeg']:
    process_image(FILE_PATH, model)
elif ext in ['.mp4', '.avi', '.mov']:
    process_video(FILE_PATH, model)
else:
    raise ValueError("Unsupported file format")


[32m2024-05-23 20:28:20.612[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_pretrained[0m:[36m47[0m - [1mload_weights_start[0m
[32m2024-05-23 20:28:20.613[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_pretrained[0m:[36m48[0m - [1mLoaded 453 parameters![0m
[32m2024-05-23 20:28:20.614[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_pretrained[0m:[36m49[0m - [1mload_weights_done[0m
