In [1]:
import os.path as osp
import json
import numpy as np
from PIL import Image


### json 파일 로드 후 이미지 불러오기

In [2]:
_lang_list = ['chinese', 'japanese', 'thai', 'vietnamese']
split='train'
root_dir = 'data'

In [3]:
total_anno = dict(images=dict())
for nation in _lang_list:
    with open(osp.join(root_dir, '{}_receipt/ufo/{}.json'.format(nation, split)), 'r', encoding='utf-8') as f:
        anno = json.load(f)
    for im in anno['images']:
        total_anno['images'][im] = anno['images'][im]

In [4]:
anno = total_anno
image_fnames = sorted(anno['images'].keys())

In [None]:
image_fnames

In [6]:
image_fnames[1]

'extractor.ja.in_house.appen_000029_page0001.jpg'

In [7]:
def _infer_dir(fname):
        lang_indicator = fname.split('.')[1]
        if lang_indicator == 'zh':
            lang = 'chinese'
        elif lang_indicator == 'ja':
            lang = 'japanese'
        elif lang_indicator == 'th':
            lang = 'thai'
        elif lang_indicator == 'vi':
            lang = 'vietnamese'
        else:
            raise ValueError
        return osp.join(root_dir, f'{lang}_receipt', 'img', split)

In [8]:
for idx in range(len(image_fnames)):
    image_fname = image_fnames[idx]
    image_fpath = osp.join(_infer_dir(image_fname), image_fname)
    image = Image.open(image_fpath)
    break

### 전체 이미지의 채널별 평균

In [25]:
image = np.array(image)

In [26]:
image.shape

(3382, 1278, 3)

In [27]:
image.mean()

154.28883442583353

In [28]:
a = image.mean(axis=(0,1)) / 255.0
a.shape

(3,)

In [15]:
np.stack([image.mean(axis=(0,1)) / 255.0, image.mean(axis=(0,1)) / 255.0])

array([[0.63214903, 0.60755508, 0.54707624],
       [0.63214903, 0.60755508, 0.54707624]])

In [None]:
arr = np.empty((0,3), dtype=float)

for idx in range(len(image_fnames)):
    image_fname = image_fnames[idx]
    image_fpath = osp.join(_infer_dir(image_fname), image_fname)
    image = Image.open(image_fpath)
    if image.mode != 'RGB':
            print(image.mode)
            image = image.convert('RGB')
    image = np.array(image)
    img_mean = image.mean(axis=(0,1)) / 255.0
    img_mean = np.expand_dims(img_mean, axis=0)

    arr = np.concatenate([arr, img_mean])


In [18]:
arr.shape

(400, 3)

In [38]:
img_mean.shape

(1, 4)

In [39]:
image_fname

'extractor.th.in_house.appen_000110_page0001.png'

In [41]:
image = Image.open(image_fpath)
image = np.array(image)


In [43]:
image.shape

(829, 465, 4)

### 전체 이미지의 채널별 평균의 mean, std

In [21]:
arr.mean(axis=0)

array([0.67817392, 0.65295607, 0.62366413])

In [22]:
arr.std(axis=0)

array([0.16878251, 0.18058526, 0.20039655])

### binarization - adaptive threshold 적용한 mean, std 구하기

In [18]:
import cv2
import matplotlib.pyplot as plt

In [19]:
image = np.array(image)

In [20]:
image.shape

(3382, 1278, 3)

In [25]:
from tqdm import tqdm

arr = np.empty((0,3), dtype=float)

for idx in tqdm(range(len(image_fnames))):
    image_fname = image_fnames[idx]
    image_fpath = osp.join(_infer_dir(image_fname), image_fname)
    image = Image.open(image_fpath)
    if image.mode != 'RGB':
        #     print(image.mode)
            image = image.convert('RGB')
    image = np.array(image)

    image = cv2.imread(image_fpath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    adaptive_threshold = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

#     plt.imshow(adaptive_threshold, cmap='gray')

    adaptive_threshold = np.stack([adaptive_threshold] * 3, axis=-1)
    adaptive_threshold_mean = adaptive_threshold.mean(axis=(0,1)) / 255.0
    adaptive_threshold_mean = np.expand_dims(adaptive_threshold_mean, axis=0)

    arr = np.concatenate([arr, adaptive_threshold_mean])

100%|██████████| 400/400 [01:16<00:00,  5.25it/s]


In [26]:
arr.mean(axis=0)

array([0.82856762, 0.82856762, 0.82856762])

In [27]:
arr.std(axis=0)

array([0.08780395, 0.08780395, 0.08780395])