In [99]:
# Utils
import json
import random
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont

def read_json_file(jsonfile):
    with open(str(jsonfile)) as f:
        data = json.load(f)
    return data

def xywh2xyxy(bbox):
    bbox[2] += bbox[0]
    bbox[3] += bbox[1]
    return bbox

def putText(img, text, start,
            color=(0,255,0),
            size=5,
            font='MINGLIU.ttf'):
    pil_img = Image.fromarray(cv2.cvtColor(img.copy(), cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(pil_img)
    font_text = ImageFont.truetype(font=font, size=size, encoding='utf-8')
    draw.text(start, text, color, font=font_text)
    cv2_img = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)
    return cv2_img


def draw_bboxes(img, bboxes, labels=None):
    if labels is None:
        colors = [(0,0,255) for _ in range(len(bboxes))]
    else:
        label_to_color = {}
        for k in set(labels):
            random.seed(int(k))
            color = [random.randint(0,255) for _ in range(3)]
            label_to_color[k] = color
        colors = [label_to_color[k] for k in labels]

    for i in range(len(bboxes)):
        bbox, color = bboxes[i], colors[i]
        cv2.rectangle(img, bbox[:2], bbox[2:], color, 10)
        if labels is not None:
            x0 = bbox[0]
            y0 = bbox[1]
            text = str(labels[i])
            cv2.putText(img, text, (x0,y0), cv2.FONT_HERSHEY_SIMPLEX, 2, color, 10)
    return img

In [112]:
# For public_train/pill folder
import cv2
from pathlib import Path 

SHOW_SIZE = (1000,1000)

img_files = [p for p in Path('dataset/public_train/pill/image').rglob('*') if p.suffix in ('.jpg', '.png')]

i = 0

while True:
    img_file = img_files[i]
    label_file = img_file.parents[1] / 'label' / (img_file.stem + '.json')
    
    img = cv2.imread(str(img_file))

    bboxes, labels = [], []
    for data in read_json_file(label_file):
        bbox = [data['x'], data['y'], data['w'], data['h']]
        label = data['label']
        bbox = xywh2xyxy(bbox)
        bboxes += [bbox]
        labels += [label]
    
    img = draw_bboxes(img, bboxes, labels)

    cv2.imshow('Train Pill', cv2.resize(img, SHOW_SIZE))
    k = cv2.waitKey(5) & 0xff
    if k == 27:
        break
    elif k == ord('a') and i > 0:
        i -= 1
    elif k == ord('d') and i < len(img_files)-1:
        i += 1
cv2.destroyAllWindows()




In [78]:
# For public_train/pressciption folder
# -*- coding: utf-8 -*-
import cv2
from pathlib import Path 

SHOW_SIZE = (1000,1500)

img_files = [p for p in Path('dataset/public_train/prescription/image').rglob('*') if p.suffix in ('.jpg', '.png')]

i = 0

while True:
    img_file = img_files[i]
    label_file = img_file.parents[1] / 'label' / (img_file.stem + '.json')

    img = cv2.imread(str(img_file))

    for data in read_json_file(label_file):
        txt = "id:{},label:{},text:{}".format(data['id'], data['label'], data['text'])
        bbox = data['box']
        
        cv2.rectangle(img, bbox[:2], bbox[2:], (0,0,0), 1)
        # cv2.putText(img, txt, (bbox[0],bbox[1]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 1)
        img = putText(img, txt, (bbox[0],bbox[1]), (0,255,0), 15, font='VHAVANB.TTF')

    cv2.imshow('Train Pressciption', cv2.resize(img, SHOW_SIZE))
    k = cv2.waitKey(5) & 0xff
    if k == 27:
        break
    elif k == ord('a') and i > 0:
        i -= 1
    elif k == ord('d') and i < len(img_files)-1:
        i += 1
cv2.destroyAllWindows()


In [113]:
# For public_train/pill folder
import cv2
from pathlib import Path 

SHOW_SIZE = (1000,2000)

img_files = [p for p in Path('dataset/public_train/pill/image').rglob('*') if p.suffix in ('.jpg', '.png')]

labels = []
for img_file in img_files:
    label_file = img_file.parents[1] / 'label' / (img_file.stem + '.json')
    
    for data in read_json_file(label_file):
        labels.append(data['label'])

print("The Number of labels: {}".format(len(labels)))
print("The Number of difference label: {}".format(len(set(labels))))

The Number of labels: 32828
The Number of difference label: 108


In [85]:
labels

[107,
 107,
 107,
 107,
 1,
 61,
 61,
 8,
 104,
 91,
 89,
 2,
 2,
 27,
 43,
 91,
 29,
 29,
 43,
 51,
 37,
 51,
 92,
 64,
 64,
 61,
 61,
 51,
 51,
 89,
 43,
 43,
 64,
 64,
 64,
 40,
 104,
 99,
 99,
 26,
 26,
 91,
 64,
 104,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 17,
 91,
 51,
 51,
 36,
 51,
 94,
 46,
 87,
 51,
 94,
 64,
 64,
 31,
 64,
 40,
 104,
 64,
 107,
 107,
 107,
 107,
 107,
 107,
 37,
 37,
 37,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 107,
 51,
 51,
 36,
 10,
 10,
 65,
 65,
 97,
 64,
 64,
 40,
 38,
 54,
 61,
 37,
 61,
 51,
 51,
 36,
 10,
 10,
 65,
 65,
 97,
 85,
 54,
 45,
 8,
 41,
 11,
 46,
 51,
 99,
 10,
 104,
 43,
 7,
 7,
 91,
 91,
 43,
 86,
 55,
 107,
 107,
 107,
 107,
 107,
 29,
 99,
 10,
 84,
 43,
 89,
 102,
 43,
 54,
 46,
 19,
 19,
 50,
 10,
 10,
 60,
 2,
 27,
 89,
 2,
 44,
 99,
 44,
 99,
 92,
 7,
 7,
 51,
 51,
 23,
 8,
 46,
 106,
 99,
 43,
 7,
 64,
 64,
 26,
 91,
 107,
 107,
 107

In [86]:
set(labels)

{0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107}

In [94]:
read_json_file('dataset/public_train/prescription/label/VAIPE_P_TRAIN_1151.json')

[{'id': 2, 'text': '1/1', 'label': 'other', 'box': [58, 28, 113, 58]},
 {'id': 3,
  'text': 'PK Số 5.1 [TĐ]',
  'label': 'other',
  'box': [26, 89, 137, 112]},
 {'id': 4,
  'text': 'Số phiếu 67626/2019',
  'label': 'other',
  'box': [372, 92, 547, 121]},
 {'id': 5, 'text': '1373137', 'label': 'other', 'box': [626, 101, 688, 118]},
 {'id': 6,
  'text': 'TOA THUỐC BHYT',
  'label': 'other',
  'box': [260, 122, 508, 152]},
 {'id': 9, 'text': 'Nam', 'label': 'other', 'box': [620, 160, 655, 177]},
 {'id': 10, 'text': 'Nữ', 'label': 'other', 'box': [685, 160, 708, 177]},
 {'id': 14, 'text': 'TQ', 'label': 'other', 'box': [187, 207, 214, 227]},
 {'id': 15, 'text': '9T', 'label': 'other', 'box': [300, 207, 321, 224]},
 {'id': 16, 'text': '97', 'label': 'other', 'box': [366, 207, 386, 224]},
 {'id': 17, 'text': '317', 'label': 'other', 'box': [436, 207, 466, 225]},
 {'id': 18, 'text': '32006', 'label': 'other', 'box': [509, 207, 561, 225]},
 {'id': 19,
  'text': 'MMHg Thân nhiệt:',
  'label': '

In [95]:
read_json_file('dataset/public_train/prescription/label/VAIPE_P_TRAIN_1152.json')

[{'id': 1, 'text': '1/1', 'label': 'other', 'box': [58, 28, 113, 58]},
 {'id': 3,
  'text': 'Số phiếu 907/2019',
  'label': 'other',
  'box': [372, 91, 525, 123]},
 {'id': 4, 'text': '4659566', 'label': 'other', 'box': [626, 100, 688, 118]},
 {'id': 5,
  'text': 'TOA THUỐC BHYT',
  'label': 'other',
  'box': [260, 122, 508, 152]},
 {'id': 8, 'text': 'Nam', 'label': 'other', 'box': [620, 160, 655, 177]},
 {'id': 9, 'text': 'Nữ', 'label': 'other', 'box': [685, 160, 708, 177]},
 {'id': 13, 'text': '35', 'label': 'other', 'box': [366, 207, 387, 224]},
 {'id': 14, 'text': '210', 'label': 'other', 'box': [436, 207, 468, 225]},
 {'id': 15, 'text': '00068', 'label': 'other', 'box': [509, 207, 560, 225]},
 {'id': 16, 'text': '2', 'label': 'other', 'box': [249, 208, 262, 224]},
 {'id': 17, 'text': '35', 'label': 'other', 'box': [300, 208, 322, 225]},
 {'id': 18,
  'text': 'MMHg Thân nhiệt:',
  'label': 'other',
  'box': [450, 231, 595, 255]},
 {'id': 19,
  'text': 'lần/phút Huyết áp:',
  'label'

In [1]:
# Check bbox detector
import cv2 
import numpy as np 
from pathlib import Path 
from lib.detection_engine import detection_engine
from lib.utils import utils

detectionEngine = detection_engine.DetectionEngine()

SHOW_SIZE = (1000,1000)
img_files = [p for p in Path('document/dataset/public_test/pill/image').rglob('*') if p.suffix in ('.jpg', '.png')]

i = 0
while True:
    imgfile = img_files[i]
    img = cv2.imread(str(imgfile))
    bboxes, scores = detectionEngine.predict(img)
    if bboxes is not None:
        texts = [str(round(score,2)) for score in scores]
        img = utils.draw_bboxes(img, bboxes, texts)
    
    cv2.imshow("PILL", cv2.resize(img, SHOW_SIZE))
    k = cv2.waitKey(5) & 0xff
    if k == 27:
        break
    elif k == ord('a') and i > 0:
        i -= 1
    elif k == ord('d') and i < len(img_files)-1:
        i += 1
cv2.destroyAllWindows()

FileNotFoundError: [Errno 2] No such file or directory: 'None'

PosixPath('adas/asd/asd/df.jpg')

In [111]:
a.replace('d')

FileNotFoundError: [Errno 2] No such file or directory: 'adas/asd/asd/df.jpg' -> 'd'