:[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/persimmon-persimmon/mnist-detection/blob/master/data_preparation.ipynb)

## マウント

In [None]:
from google.colab import drive
import glob
import os
drive.mount("/content/drive/")
try:
    os.chdir("drive/MyDrive/mnist_detection/")
except:
    pass

## データ作成

In [None]:
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
import json
import numpy as np
import pandas as pd
import cv2
import random
from PIL import Image

In [None]:
(_x_train_val, _y_train_val), (_x_test, _y_test)=mnist.load_data()
_x_train, _x_val, _y_train, _y_val = train_test_split(_x_train_val, _y_train_val, test_size=0.2)
train={i:[x for x, t in zip(_x_train, _y_train) if t == i] for i in range(10)}
val={i:[x for x, t in zip(_x_val, _y_val) if t == i] for i in range(10)}
test={i:[x for x, t in zip(_x_test, _y_test) if t == i] for i in range(10)}
image_set={"train":train,"val":val,"test":test}

In [None]:
n_image={"train":14000,"val":4000,"test":2000}
random.seed(0)
height = 224
width = 224
ls=[i for i in range(-1,10)]
weight=[40,10,10,15,15,20,20,25,25,30,30]
os.makedirs("images", exist_ok=True)
# COCOフォーマット出力用変数
coco={}
coco["categories"]=[]
coco["categories"].append({"id":"0","name":"zero"})
coco["categories"].append({"id":"1","name":"one"})
coco["categories"].append({"id":"2","name":"two"})
coco["categories"].append({"id":"3","name":"three"})
coco["categories"].append({"id":"4","name":"four"})
coco["categories"].append({"id":"5","name":"five"})
coco["categories"].append({"id":"6","name":"six"})
coco["categories"].append({"id":"7","name":"seven"})
coco["categories"].append({"id":"8","name":"eight"})
coco["categories"].append({"id":"9","name":"nine"})

annotation_template={}
annotation_template["segmentation"]=0
annotation_template["area"]=28*28
annotation_template["iscrowd"]=False
annotation_template["isbbox"]=True
annotation_template["image_id"]=0
annotation_template["bbox"]=0
annotation_template["category_id"]=0
annotation_template["id"]=0

image_template={}
image_template["dataet_id"]=1
image_template["deleted"]=False
image_template["file_name"]=0
image_template["id"]=0
image_template["num_annotations"]=0
image_template["path"]=0
image_template["width"]=width
image_template["height"]=height

for data_type in ["train","val","test"]:
    images=[]
    annotations=[]
    for t in range(1,n_image[data_type]+1):
        blank = np.random.random((height, width, 3))*127
        blank = np.int64(blank)
        image_template["file_name"]=f"images/{data_type}_{str(t).zfill(6)}.jpg"
        image_template["id"]+=1
        image_template["path"]=f"images/{data_type}_{str(t).zfill(6)}.jpg"
        for i,label in enumerate(random.choices(ls,k=4,weights=weight)):
            if label==-1:continue
            im=random.choice(image_set[data_type][label])
            if i//2==0:
                x_ = int(random.uniform(0,height//2 - 28))
            else:
                x_ = int(random.uniform(height//2,height - 28))
            if i%2==0:
                y_ = int(random.uniform(0,width//2 - 28))
            else:
                y_ = int(random.uniform(width//2, width - 28))
            blank[x_:x_ + 28,y_:y_ + 28,0]+=np.int64(im/3)
            blank[x_:x_ + 28,y_:y_ + 28,1]+=np.int64(im/3)
            blank[x_:x_ + 28,y_:y_ + 28,2]+=np.int64(im/3)
            x=y_
            y=x_
            annotation_template["image_id"]=image_template["id"]
            annotation_template["bbox"]=[x,y,28,28]
            annotation_template["category_id"]=str(label)
            annotation_template["id"]+=1
            annotation_template["segmentation"]=[[x,y,x+28,y,x+28,y+28,x,y+28]]
            annotations.append(annotation_template.copy())
        cv2.imwrite(image_template["path"],blank)
        images.append(image_template.copy())

    coco["images"]=images
    coco["annotations"]=annotations
    with open(f"coco_{data_type}.json","w") as f:
      f.write(json.dumps(coco))
    coco.pop("images")
    coco.pop("annotations")

In [None]:
len(glob.glob("images/*"))

20000

## データ確認

In [None]:
!pip install pyyaml==5.1
import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html

In [None]:
from google.colab.patches import cv2_imshow
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances

In [None]:
for data_type in ["train","val","test"]:
    try:
        reg_name=f"mnist_detection_{data_type}"
        register_coco_instances(reg_name, {}, f"coco_{data_type}.json", "")
    except Exception as e:
        print(e)

In [None]:
for data_type in ["train","val","test"]:
    reg_name=f"mnist_detection_{data_type}"
    mnist_detection_metadata = MetadataCatalog.get(reg_name)
    dataset_dicts = DatasetCatalog.get(reg_name)
    print(data_type)
    for d in random.sample(dataset_dicts, 3):
        print(d["file_name"])
        img = cv2.imread(d["file_name"])
        visualizer = Visualizer(img[:, :, ::-1], metadata=mnist_detection_metadata, scale=1.0)
        vis = visualizer.draw_dataset_dict(d)
        cv2_imshow(vis.get_image()[:, :, ::-1])

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.  
という警告が出るが、無視してよさそう。カテゴリIDが1から始まってないと発生する警告。  
以下にソースコード  

https://detectron2.readthedocs.io/en/latest/_modules/detectron2/data/datasets/coco.html


---

