SCREEN DETECTION:
-----------------
YOLOv7: https://github.com/WongKinYiu/yolov7


hyperparameters:	lr0=0.01, lrf=0.1, momentum=0.937, weight_decay=0.0005, 
			warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, 
			box=0.05, cls=0.3, cls_pw=1.0, obj=0.7, obj_pw=1.0,
			iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7,
			hsv_v=0.4, degrees=0.0, translate=0.2, scale=0.9, shear=0.0,
			perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.15,
			copy_paste=0.0, paste_in=0.15, loss_ota=1


image_size = 416
Batch_size = 4
No. of epochs = 10

Final_precision: 0.93
Final_recall: 0.97
mAP@0.5: 0.995
mAP@0.5:0.95: 0.96

SCREEN CLASSIFIER:
------------------

Model used: mobilenetV3_small

hyperparameters:	lr=0.001, epochs=10, grad_clip=0.1, weight_decay=1e-4
Optimizer used: SGD

Final Validation Accuracy: 82%

HEART RATE GRAPH DETECTION:
---------------------------
YOLOv5: https://github.com/ultralytics/yolov5

hyperparameters:	lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005,
			warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1,
			box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0,
			iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015,
			hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1,
			scale=0.5, shear=0.0, perspective=0.0, flipud=0.0,
			fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0

image_size: 416
Batch size: 16
No. of epochs: 10

Final_precision: 0.95
Final_recall: 0.97
mAP@0.5: 0.995

In [None]:
# requirements
%pip install -q easyocr
%pip install -q ocrd-fork-pylsd==0.0.3

Note: you may need to restart the kernel to use updated packages.




Note: you may need to restart the kernel to use updated packages.




In [14]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from datasets.test_dataset import LoadImages
from perspective import Perspective
from screen_ocr import ScreenOCR
from models.experimental import attempt_load
from utils.torch_utils import select_device
from utils.general import non_max_suppression, scale_coords, xyxy2xywh
import pandas as pd
import time
import matplotlib.pyplot as plt
import cv2

In [15]:
class FullPipeline(object):
    def __init__(self, weight, graph_weight=None,digitize_graph=False,device=''):
        self.digitize_graph=False
        if digitize_graph and graph_weight:
            self.graphdetector=torch.load(graph_weight).to('cpu')
            self.graphdetector.eval()
            self.digitize_graph = True
        self.device = select_device(device)
        self.detection_model = attempt_load(weight, map_location=self.device)
        self.stride = int(self.detection_model.stride.max())
        self.classifier_model = None
        self.ppt = Perspective()       # class for handling perspective change
        self.ocr = ScreenOCR()         # class for handling OCR part
        self.detection_model.eval()
        
        # self.classifier_model.eval()

    def crop_bboxes(self, image, xywh, margin=15):
        print(image.shape)
        print(xywh)
        h_img, w_img, _ = image.shape
        x1 = int(max(0, xywh[0]*w_img-xywh[2]*w_img*0.5-margin))
        y1 = int(max(0, xywh[1]*h_img-xywh[3]*h_img*0.5-margin))
        x2 = int(min(w_img, xywh[0]*w_img + xywh[2]*w_img*0.5 + margin))
        y2 = int(min(h_img, xywh[1]*h_img + xywh[3]*h_img*0.5 + margin))
        cropped_image = image[y1:y2, x1:x2, :]
        plt.imshow(cropped_image)
        plt.show()
        warped_image = self.ppt.shift_perspective(cropped_image)
        return warped_image

    def clean_img(self, img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # sharpen image
        sharpen = cv2.GaussianBlur(gray, (0,0), 3)
        sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0)

        # apply adaptive threshold to get black and white effect
        thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15)
        cv2.imwrite('thresh.jpeg',thresh)
        return sharpen

    def evaluate(self, test_data, classify=False):
        # df = pd.DataFrame(columns=['rr', 'hr', 'spo2', 'map', 'sys', 'dia'])
        df = []
        for path, img, im0 in test_data:
            img = torch.from_numpy(img).to(self.device).float()
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            with torch.no_grad():
                pred = self.detection_model(img, augment=True)[0]
            pred = non_max_suppression(pred, 0.25, 0.45, agnostic=True)
            det = pred[0]
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                for *xyxy, conf, cls in reversed(det):
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
            cropped_image = self.crop_bboxes(im0, xywh)
            cv2.imwrite('cropped.jpeg',cropped_image)
            if classify:
                with torch.no_grad():
                    screen_types = self.classifier_model(cropped_image)
            if self.digitize_graph:
                with torch.no_grad():
                    pred=self.graphdetector(img, augment=True)
                pred = non_max_suppression(pred, 0.25, 0.45, agnostic=True)
                det = pred[0]
                if len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                    for *xyxy, conf, cls in reversed(det):
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
                cropped_graph = self.crop_bboxes(im0, xywh)
                cv2.imwrite('cropped_graph.jpeg',cropped_graph)
            # cleaning and OCR part
            img = cropped_image.copy()
            cleaned = self.clean_img(cropped_image)
            vitals_dict = self.ocr.read_vitals(image=cleaned, image_rgb=img)
            print(vitals_dict)
            df.append(vitals_dict)
        return df

In [16]:
YOLOv7_WEIGHT  = './weights/yolov7_best.pt'

def inference(image_path:str):
    """
    Function responsible for inference.
    Args: 
      image_path: str, path to image file. eg. "input/aveksha_micu_mon--209_2023_1_17_12_0_34.jpeg"
    Returns:
      result: dict, final output dictionary. eg. {"HR":"80", "SPO2":"98", "RR":"15", "SBP":"126", "DBP":"86"}
    """
  
    ### put your code here
    t0=time.time()
    fpl = FullPipeline(YOLOv7_WEIGHT)

    t1 = time.time()
    dataset = LoadImages(image_path, img_size=640, stride=fpl.stride)

    t2 = time.time()
    df = fpl.evaluate(dataset)

    result = df[0]

    return result    

In [17]:
res = inference('images/cchdavangere_micu_mon--2_2023_1_5_5_10_1.jpeg')

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
(720, 1280, 3)
[0.4546875059604645, 0.46666666865348816, 0.38593751192092896, 0.5416666865348816]


  plt.show()


{'RR': 26, 'HR': 99, 'SPO2': None, 'MAP': None, 'SBP': None, 'DBP': None}


In [18]:
res

{'RR': 26, 'HR': 99, 'SPO2': None, 'MAP': None, 'SBP': None, 'DBP': None}