In [1]:
import json
import shutil
import xml.etree.ElementTree as ET
from pathlib import Path
from IPython.display import Image

import cv2
import numpy as np

from tqdm import tqdm
from tqdm.contrib import tenumerate

In [None]:
from torchvision import transforms

transforms.Compose

In [None]:
BASE_DIR = Path('F:/dataset_raw_data/ILSVRC')
IMAGE_DIR = BASE_DIR / 'Data/CLS-LOC'
ANNOTATION_DIR = BASE_DIR / 'Annotations/CLS-LOC'

BASE_SAVE_DIR = Path('./dataset/ILSVRC_original_size')

In [None]:
data_type = 'train'

train_image_dir = IMAGE_DIR / data_type
train_annotation_dir = ANNOTATION_DIR / data_type

train_images = sorted(train_image_dir.glob('**/*.JPEG'))
train_annotations = sorted(train_annotation_dir.glob('**/*.xml'))

save_dir = BASE_SAVE_DIR / data_type
save_dir.mkdir(parents=True, exist_ok=True)

In [None]:
category_list = []
for data_type in ['train', 'val']:
    train_image_dir = IMAGE_DIR / data_type
    train_annotation_dir = ANNOTATION_DIR / data_type

    train_images = sorted(train_image_dir.glob('**/*.JPEG'))
    train_annotations = sorted(train_annotation_dir.glob('**/*.xml'))

    save_dir = BASE_SAVE_DIR / data_type
    save_dir.mkdir(parents=True, exist_ok=True)

    for data_no, annotation_path in tenumerate(train_annotations):
        tree = ET.parse(annotation_path)
        root = tree.getroot()
        object_tree = root.find('object')
        bbox_tree = object_tree.find('bndbox')

        xmin = int(bbox_tree.find('xmin').text)
        ymin = int(bbox_tree.find('ymin').text)
        xmax = int(bbox_tree.find('xmax').text)
        ymax = int(bbox_tree.find('ymax').text)

        category = object_tree.find('name').text
        if category not in category_list:
            category_list.append(category)
        category_id = category_list.index(category)

        bbox = {
            'xmin': xmin,
            'ymin': ymin,
            'width': xmax - xmin,
            'height': ymax - ymin
        }
        bbox_size = bbox['width'] * bbox['height']

        annotation_data = [
            {
                'category': category,
                'category_id': category_id,
                'bbox': [bbox['xmin'], bbox['ymin'], bbox['width'], bbox['height']],
                'bbox_size': bbox_size
            }
        ]

        if data_type == 'train':
            image_path = train_image_dir / f'{category}/{annotation_path.stem}.JPEG'
        elif data_type == 'val':
            image_path = train_image_dir / f'{annotation_path.stem}.JPEG'
        save_image_path = save_dir / f'{str(data_no).zfill(8)}.jpg'
        # shutil.copy(image_path, save_image_path)
        with open(save_image_path.with_suffix('.json'), 'w') as f:
            json.dump(annotation_data, f, indent=2)

In [None]:
len(category_list)

In [None]:
train_image_dir = IMAGE_DIR / 'train'
temp = sorted(train_image_dir.glob('*'))
temp

In [2]:
BASE_DIR = Path('./dataset/ILSVRC_original_size')
SAVE_DIR = Path('./dataset/ILSVRC')
(SAVE_DIR / 'train').mkdir(exist_ok=True, parents=True)
(SAVE_DIR / 'val').mkdir(exist_ok=True, parents=True)

TARGET_IMAGE_SIZE = 224

HALF_SIZE = int(TARGET_IMAGE_SIZE / 2)

image_paths = sorted(BASE_DIR.glob('**/*.jpg'))

for index, image_path in tenumerate(image_paths):
    # print(image_path)
    annotation_path = image_path.with_suffix('.json')
    with annotation_path.open() as f:
        annotation_data = json.load(f)
    xmin, ymin, width, height = annotation_data[0]['bbox']
    # print(f'bbox info: {ymin=}, {xmin=}, {width=}, {height=}')

    image = cv2.imread(str(image_path))
    if index == 399686 or index == 577747:
        image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)

    # 画像サイズが足りていなければパディング
    image_height, image_width, _ = image.shape
    if image_width < TARGET_IMAGE_SIZE:
        lack_num = TARGET_IMAGE_SIZE - image_width
        half_lack_num = int(lack_num / 2)

        image = cv2.copyMakeBorder(image, top=0, bottom=0, left=half_lack_num, right=lack_num - half_lack_num, borderType=cv2.BORDER_REPLICATE)
        x_min += half_lack_num

    if image_height < TARGET_IMAGE_SIZE:
        lack_num = TARGET_IMAGE_SIZE - image_height
        half_lack_num = int(lack_num / 2)
        image = cv2.copyMakeBorder(image, top=half_lack_num, bottom=lack_num - half_lack_num, left=0, right=0, borderType=cv2.BORDER_REPLICATE)
        ymin += half_lack_num

    image_height, image_width, _ = image.shape
    # print(f'Padding後: {image_width=}, {image_height=}')
    # print(f'bbox info: {ymin=}, {xmin=}, {width=}, {height=}')
    # bboxの横幅が224より大きければ、bboxの値を使用
    if width > TARGET_IMAGE_SIZE:
        x_min = xmin
        x_max = xmin + width
    # 小さい場合は、224になるように切り出し
    else:
        x_center = int(xmin + width / 2)

        x_min = x_center - HALF_SIZE
        x_max = x_center + HALF_SIZE
        if x_min < 0:
            x_min = 0
            x_max = 224
        elif x_max >= image_width:
            x_max = image_width
            x_min = x_max - 224

    # print(f'{x_min=}, {x_max=}')

    # bboxの縦幅が224より大きければ、bboxの値を使用
    if height > TARGET_IMAGE_SIZE:
        y_min = ymin
        y_max = ymin + height
    # 小さい場合は、224になるように切り出し
    else:
        y_center = int(ymin + height / 2)    
        y_min = y_center - HALF_SIZE
        y_max = y_center + HALF_SIZE
        if y_min < 0:
            y_min = 0
            y_max = 224
        elif y_max >= image_height:
            y_max = image_height
            y_min = y_max - 224

    # print(f'{y_min=}, {y_max=}')

    cropped_image = image[y_min: y_max, x_min: x_max]

    cropped_image_height, cropped_image_width, _ = cropped_image.shape
    # print(f'{cropped_image_height=}, {cropped_image_width=}')

    if cropped_image_width != TARGET_IMAGE_SIZE or cropped_image_height != TARGET_IMAGE_SIZE:
        if cropped_image_width >= cropped_image_height:
            target_height = round(cropped_image_height * (TARGET_IMAGE_SIZE / cropped_image_width))
            # print(f'{target_width=}, {cropped_image_height=}, {cropped_image_width=}')
            cropped_image = cv2.resize(image, (TARGET_IMAGE_SIZE, target_height))

            resized_height, _, _ = cropped_image.shape
            
            lack_num = TARGET_IMAGE_SIZE - resized_height
            half_lack_num = int(lack_num / 2)

            # print(f'{cropped_image_height=}, {cropped_image_width=}, resized: {cropped_image.shape[:2]}')

            cropped_image = cv2.copyMakeBorder(cropped_image, half_lack_num, lack_num - half_lack_num, 0, 0, borderType=cv2.BORDER_REPLICATE)
        else:
            target_width = round(cropped_image_width * (TARGET_IMAGE_SIZE / cropped_image_height))
            # print(f'{target_width=}, {cropped_image_height=}, {cropped_image_width=}')
            cropped_image = cv2.resize(image, (target_width, TARGET_IMAGE_SIZE))

            _, resized_width, _ = cropped_image.shape

            lack_num = TARGET_IMAGE_SIZE - resized_width
            half_lack_num = int(lack_num / 2)

            # print(f'{cropped_image_height=}, {cropped_image_width=}, resized: {cropped_image.shape[:2]}')
            # print(half_lack_num, lack_num - half_lack_num)

            cropped_image = cv2.copyMakeBorder(cropped_image, 0, 0, half_lack_num, lack_num - half_lack_num, borderType=cv2.BORDER_REPLICATE)


    save_image_path = SAVE_DIR / image_path.parent.stem / image_path.name
    assert cropped_image.shape == (224, 224, 3), f'Missing, {cropped_image.shape}'

    cv2.imwrite(str(save_image_path), cropped_image)

    with save_image_path.with_suffix('.json').open('w') as f:
        json.dump(annotation_data[0], f, indent=2)

  0%|          | 0/594546 [00:00<?, ?it/s]

In [4]:
from PIL import Image

In [5]:
image_paths = sorted(SAVE_DIR.glob('**/*.jpg'))
sum_img = np.zeros((224, 224, 3))
for image_path in tqdm(image_paths):
    image = cv2.imread(str(image_path))
    sum_img += image

avg_img = sum_img / len(image_paths)
Image.from_array(avg_img)

100%|██████████| 594546/594546 [42:58<00:00, 230.62it/s]  


AttributeError: module 'PIL.Image' has no attribute 'from_array'

In [35]:
np.save('dataset/ILSVRC/average.np', avg_img)

In [17]:
test = np.array(
    [
        [
            [1, 1, 1, 1],
            [1, 1, 1, 1],
            [1, 1, 1, 1],
            [1, 1, 1, 1],
        ],
        [
            [2, 2, 2, 2],
            [2, 2, 2, 2],
            [2, 2, 2, 2],
            [2, 2, 2, 2],
        ],
        [
            [3, 3, 3, 3],
            [3, 3, 3, 3],
            [3, 3, 3, 3],
            [3, 3, 3, 3],
        ],
    ]
)
test

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[2, 2, 2, 2],
        [2, 2, 2, 2],
        [2, 2, 2, 2],
        [2, 2, 2, 2]],

       [[3, 3, 3, 3],
        [3, 3, 3, 3],
        [3, 3, 3, 3],
        [3, 3, 3, 3]]])

In [27]:
test.mean(axis=2), test.mean(axis=2).mean(axis=1)

(array([[1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.]]),
 array([1., 2., 3.]))

In [None]:
_, buf = cv2.imencode(".jpg", cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE))
display(Image(data=buf.tobytes()))

In [None]:
cropped_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)[ymin: ymin + height, xmin: xmin + width]
_, buf = cv2.imencode(".jpg", cropped_image)
display(Image(data=buf.tobytes()))

In [None]:
image.shape

In [None]:
[i for i in range(100) if i % 10 == 0]

In [None]:
a = 200

(200 + 112) - (200 - 112)

In [47]:
IMAGE_DIR = Path('./dataset/ILSVRC/val')
image_paths = list(IMAGE_DIR.glob('*.jpg'))
image_path = image_paths[0]

In [48]:
for image_path in tqdm(image_paths):
    image = cv2.imread(str(image_path))
    np.save(image_path.with_suffix('.npy'), image)

100%|██████████| 50000/50000 [04:17<00:00, 193.96it/s]


In [39]:
image = cv2.imread(str(image_path))

In [40]:
np.save(image_path.with_suffix('.npy'), image)

In [43]:
mode = 'npy'
if mode == 'jpg':
    image_path = image_paths[0]
else:
    image_path = image_paths[0].with_suffix('.npy')
image_path = str(image_path)

In [44]:
%%time
for _ in range(10000):
    image = cv2.imread(image_path)

CPU times: total: 15.6 ms
Wall time: 315 ms
