## Импорт необходимых библиотек

In [None]:
import cv2
import os
import matplotlib.pyplot as plt
from img_embedder import *
from torchvision import transforms
from PIL import Image
import numpy as np
from numpy import random
from models.experimental import attempt_load
from utils.general import check_img_size, non_max_suppression, scale_coords
from utils.datasets import LoadImages, letterbox
from utils.torch_utils import select_device

In [None]:
%matplotlib inline

In [None]:
s = f'Using torch {torch.__version__} '
d = [torch.cuda.get_device_properties(i) for i in range(torch.cuda.device_count())]
for i in range(0, torch.cuda.device_count()):
    if i == 1:
        s = ' ' * len(s)
    print("%scuda:%g (%s, %dMB)" % (s, i, d[i].name, d[i].total_memory / 1024 ** 2))

In [None]:
device = select_device('cuda')

## Основные функции

In [None]:
# функция для добавления рамки на фото
def plot_label_box(img, rect, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = rect
    out = cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        out = cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, 
                          [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return out

In [None]:
# функция для получения всех объектов на фото с помощью yolov5
def get_objects(model, img0, opt):
    
    detects_obj = dict()
    
    img = letterbox(img0, new_shape=opt['imgsz'])[0]

    # Convert
    #img = img[:, :, ::-1] #if open with cv2
    img = img.transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device)
    img = img.half() if opt['half'] else img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0

    if img.ndimension() == 3:
        img = img.unsqueeze(0)
        
    pred = model(img)[0]
    pred = non_max_suppression(pred, opt['conf_thres'], opt['iou_thres'], 
                               classes=opt['classes'], agnostic=opt['agnostic'])
    
    for i, det in enumerate(pred):  # detections per image
        gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
        if len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()

            # Get results
            for *xyxy, conf, cls in reversed(det):
                obj_class = names[int(cls)]
                obj_arr = detects_obj.get(obj_class, list())
                c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3]))
                obj_arr.append([float(conf), (c1, c2)])
                detects_obj[obj_class] = obj_arr
            
    return detects_obj

In [None]:
# функция для расчета входит ли в зону интереса найденный объект
def comp_overlaps(interes_zone, obj_zone):
    interes_zone = np.array(interes_zone).reshape(-1)
    obj_zone = np.array(obj_zone).reshape(-1)
    
    x1 = np.maximum(interes_zone[0], obj_zone[0])   
    x2 = np.minimum(interes_zone[2], obj_zone[2])
    y1 = np.maximum(interes_zone[1], obj_zone[1])
    y2 = np.minimum(interes_zone[3], obj_zone[3])
    
    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
    obj_area = (obj_zone[2] - obj_zone[0]) * (obj_zone[3] - obj_zone[1])

    return intersection / obj_area

In [None]:
# получаем из найденных объектов только людей, которые находятся в зоне интереса
def get_interes_objs(objs, interes_zone, over_treshold, classes=None):
    
    interes_obj = dict()
    
    for obj_class in objs.keys():
        if obj_class not in classes: continue
        interes_obj[obj_class] = [x for x in objs[obj_class] if comp_overlaps(interes_zone, x[1]) > over_treshold]
    
    return interes_obj

## Инициализация эмбеддера и трансформера

In [None]:
norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
base_transform = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor(), norm])

In [None]:
saved_models = os.path.join(os.getcwd(), 'image_embedder_saved_models')
for _, _, files in os.walk(saved_models): break
[file for file in files if file.__contains__('best')]

In [None]:
path_model = os.path.join(saved_models, [x for x in files if x.__contains__('trunk_best')][0])
path_embedder = os.path.join(saved_models, [x for x in files if x.__contains__('embedder_best')][0])

In [None]:
embedder = ImageEmbedder(path_model, path_embedder, base_transform)

## Инициализация yolov5

In [None]:
opt = {
    'imgsz': 640,
    'half': device.type != 'cpu',
    'conf_thres': 0.25,
    'iou_thres': 0.45,
    'classes': None,
    'agnostic': False
}
weights = 'yolov5s.pt'
source = os.path.join(os.getcwd(), 'data/images')

In [None]:
yolo5_model = attempt_load(weights, map_location=device)
opt['imgsz'] = check_img_size(opt['imgsz'], s=yolo5_model.stride.max())  # check img_size
if opt['half']:
    yolo5_model.half()  # to FP16
    
# Get names and colors
names = yolo5_model.module.names if hasattr(yolo5_model, 'module') else yolo5_model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

## Отрисовка результатов поиска объектов

In [None]:
# dataset = LoadImages(opt['source'], img_size=opt['imgsz'])
# path = dataset.files[0]
path = 'image.jpg'

In [None]:
# img0 = cv2.imread(dataset.files[0])
img_source = Image.open(path)
img0 = np.array(img_source)

In [None]:
objs = get_objects(yolo5_model, img0, opt)

In [None]:
result_img = img0.copy()
for obj_class in objs.keys():
    for obj in objs[obj_class]:
        color = colors[names.index(obj_class)]
        label = '%s %.2f' % (obj_class, obj[0])
        result_img = plot_label_box(result_img, obj[1], color=color, label=label)

Image.fromarray(result_img)

## Получение объектов с учетом зоны интереса

In [None]:
overlaps_treshold = 0.8

In [None]:
interes_zone = ((550, 40), (750, 700))

In [None]:
interes_objs = get_interes_objs(objs, interes_zone, 0.8, classes=['person'])

In [None]:
result_img = img0.copy()
result_img = plot_label_box(result_img, interes_zone, color=[0,0,0], label='interes_zone', line_thickness=3)

In [None]:
for obj_class in interes_objs.keys():
    for obj in interes_objs[obj_class]:
        color = colors[names.index(obj_class)]
        label = '%s %.2f' % (obj_class, obj[0])
        result_img = plot_label_box(result_img, obj[1], color=color, label=label)

Image.fromarray(result_img)

In [None]:
persons = interes_objs.get('person', list())
l = len(persons)

In [None]:
fig = plt.figure(figsize=(15,10))

for i in range(l):
    person_img = img_source.crop(np.array(persons[i][1]).reshape(-1))
    embedder.img2vect(person_img)
    plt.subplot(l+1, 4, i+1); plt.imshow(person_img);

## Тест производительности

In [None]:
# %%time
# print(os.environ['CUDA_VISIBLE_DEVICES'])
# for i in range(180 * 24):
#     img_source = Image.open(path)
#     img0 = np.array(img_source)
#     objs = get_objects(yolo5_model, img0, opt)
#     interes_objs = get_interes_objs(objs, interes_zone, 0.8, classes=['person'])
#     persons = interes_objs.get('person', list())
#     l = len(persons)
#     for i in range(l):
#         person_img = img_source.crop(np.array(persons[i][1]).reshape(-1))
#         embedder.img2vect(person_img)