On this starter notebook, we aim to:

* Prepare the data for instance segmentation (convert to COCO format)
* Visualize the data (image and processing)

COCO format example:

* https://www.kaggle.com/mlwhiz/simpsons-main-characters
* https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch
* https://www.kaggle.com/fmscole/convert-dataset-to-coco-format-tools

Augmentation:

* https://towardsdatascience.com/self-supervised-keypoint-learning-aade18081fc3
* https://github.com/qubvel/ttach

Insights:

* 6 images with multiple astronauts
* 68 polygons are single (~73%)
    

In [1]:
import os
os.chdir("../")

In [2]:
import pandas as pd 
import numpy as np 
import glob
import json
import ast
from PIL import Image
from imantics import Annotation

In [3]:
IMAGE_TRAIN_PATH = "data/raw/train/images/"

In [4]:
def generate_image_metadata(dirname):
    """
    Extract the width and height
    """
    file_list = []
    width_list = []
    height_list = []
    for filename in glob.glob(dirname + '*.*'):
        img = Image.open(filename)
        width, height = img.size
        file_list.append(os.path.basename(filename))
        width_list.append(width)
        height_list.append(height)
    
    res = pd.DataFrame({"filename": file_list, "width": width_list, "height":height_list})
    return res

def get_pose(x, level=1):
    x = x.split(",")
    if level == 1:
        x = x[0:45]
    elif level == 2:
        x = x[45:90]
    x = ','.join(x)
    return x

## Data Preparation

In [5]:
df_metadata = generate_image_metadata(IMAGE_TRAIN_PATH)
df_metadata["id"] = df_metadata["filename"].str.replace(".png|.jpg", "").astype(int)

In [6]:
with open(IMAGE_TRAIN_PATH + "annotations/truth2d.txt") as f:
    labels = f.read().splitlines()

In [7]:
file_list = []
pose_list = []
polygon_list = []
for label in labels:
    # Split polygon
    data = label.split("[")
    file_pose = data[0].split(",")

    if len(data) > 1:
        for polygon in data[1:]:
            file_list.append(file_pose[0])
            pose_list.append(",".join(file_pose[1:]))
            polygon_list.append(polygon.replace("]", ""))
    else:
        file_list.append(file_pose[0])
        pose_list.append(np.NaN)
        polygon_list.append(np.NaN)

In [8]:
df_label = pd.DataFrame({"filename": file_list, "pose": pose_list, "polygon": polygon_list})
df_label["polygon_cnt"] = df_label.polygon.str.count("\\)\\(") + 1
df_label["polygon_cnt"] = df_label["polygon_cnt"].fillna(0)
df_label["pose_cnt"] = (df_label.pose.str.count(",")) * 2/3 / 2
df_label["pose_cnt"] = df_label["pose_cnt"].fillna(0)
df_label

Unnamed: 0,filename,pose,polygon,polygon_cnt,pose_cnt
0,952799.jpg,,,0.0,0.0
1,626913.jpg,,,0.0,0.0
2,204037.jpg,,,0.0,0.0
3,771734.jpg,"238.0,191.0,2,428.0,207.0,2,334.0,291.0,2,237....","(242.8,130.0,264.0,111.8,268.5,111.8,271.5,107...",2.0,15.0
4,895632.jpg,"819.0,362.0,2,795.0,456.0,2,744.0,412.0,2,732....","(226.1,267.0,229.2,258.5,236.6,254.3,266.0,254...",1.0,15.0
...,...,...,...,...,...
87,140435.png,"515.0,199.0,2,604.0,185.0,2,556.0,263.0,2,0.0,...","(498.9,131.5,500.2,127.7,507.8,120.1,530.6,106...",1.0,30.0
88,140435.png,"515.0,199.0,2,604.0,185.0,2,556.0,263.0,2,0.0,...","(41.7,96.0,253.2,93.5,289.9,185.9,292.5,185.9,...",1.0,30.0
89,948061.png,"492.0,316.0,2,674.0,315.0,2,584.0,418.0,2,389....","(271.3,145.4,272.6,140.4,281.4,132.8,293.9,122...",1.0,15.0
90,949796.png,"0.0,0.0,0,416.0,268.0,1,383.0,341.0,2,0.0,0.0,...","(378.4,200.4,390.6,198.0,407.7,198.0,412.5,200...",1.0,15.0


In [9]:
df_label.filename.value_counts().reset_index().filename.value_counts()

1    80
2     6
Name: filename, dtype: int64

In [10]:
df_label.polygon_cnt.value_counts()

1.0     68
2.0      9
3.0      6
4.0      4
0.0      3
5.0      1
11.0     1
Name: polygon_cnt, dtype: int64

In [11]:
df_label.pose_cnt.value_counts()

15.0    77
30.0    12
0.0      3
Name: pose_cnt, dtype: int64

Fix `pose_cnt` > 15

In [12]:
df_label['r'] = df_label.groupby(['filename']).cumcount()+1
for i, r in df_label[df_label['pose_cnt'] > 0].iterrows():
    res = get_pose(r['pose'], r['r'])
    df_label.loc[i, 'pose'] = res


In [13]:
df_label["pose_cnt"] = (df_label.pose.str.count(",") + 1) * 2/3 / 2
df_label["pose_cnt"] = df_label["pose_cnt"].fillna(0)
df_label.pose_cnt.value_counts()

15.0    89
0.0      3
Name: pose_cnt, dtype: int64

In [14]:
df_label.loc[df_label["polygon_cnt"] > 0, "is_crowd"] = \
    np.where(df_label.loc[df_label["polygon_cnt"] > 0, "polygon_cnt"] > 1, 1, 0)

In [15]:
df_label["image_id"] = df_label["filename"].str.replace(".png|.jpg", "").astype(int)
df_label = df_label.reset_index()

In [16]:
df_label

Unnamed: 0,index,filename,pose,polygon,polygon_cnt,pose_cnt,r,is_crowd,image_id
0,0,952799.jpg,,,0.0,0.0,1,,952799
1,1,626913.jpg,,,0.0,0.0,1,,626913
2,2,204037.jpg,,,0.0,0.0,1,,204037
3,3,771734.jpg,"238.0,191.0,2,428.0,207.0,2,334.0,291.0,2,237....","(242.8,130.0,264.0,111.8,268.5,111.8,271.5,107...",2.0,15.0,1,1.0,771734
4,4,895632.jpg,"819.0,362.0,2,795.0,456.0,2,744.0,412.0,2,732....","(226.1,267.0,229.2,258.5,236.6,254.3,266.0,254...",1.0,15.0,1,0.0,895632
...,...,...,...,...,...,...,...,...,...
87,87,140435.png,"515.0,199.0,2,604.0,185.0,2,556.0,263.0,2,0.0,...","(498.9,131.5,500.2,127.7,507.8,120.1,530.6,106...",1.0,15.0,1,0.0,140435
88,88,140435.png,"0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,277.0,287.0,2,38...","(41.7,96.0,253.2,93.5,289.9,185.9,292.5,185.9,...",1.0,15.0,2,0.0,140435
89,89,948061.png,"492.0,316.0,2,674.0,315.0,2,584.0,418.0,2,389....","(271.3,145.4,272.6,140.4,281.4,132.8,293.9,122...",1.0,15.0,1,0.0,948061
90,90,949796.png,"0.0,0.0,0,416.0,268.0,1,383.0,341.0,2,0.0,0.0,...","(378.4,200.4,390.6,198.0,407.7,198.0,412.5,200...",1.0,15.0,1,0.0,949796


### Convert to COCO format

Images required these values:

* file_name
* height, width
* id

Annotation consists of:

* segmentation
* num_keypoints
* area
* is_crowd
* keypoints
* image_id
* bbox
* category_id
* id

In [17]:
with open("data/cfg/cfg_2d_v1.json") as f:
    cfg = json.load(f)

In [18]:
for idx, row in df_metadata.iterrows():
    cfg["images"].append(
        {
            "file_name": row["filename"],
            "height": row["height"],
            "width": row["width"],
            "id": row["id"]
        }
    )

In [19]:
for idx, row in df_label[~df_label["pose"].isnull()].iterrows():
    img = Image.open("data/raw/train/images/" + row["filename"])
    seg = json.loads("[" + row["polygon"].replace("(", "[").replace(")", "]").replace("][", "],[") + "]")
    ann = Annotation(polygons=seg, image=img)
    kpt = json.loads("[" + row["pose"] + "]")

    cfg["annotations"].append(
        {
            "segmentation": seg,
            "num_keypoints": 15,
            "area": ann.area,
            "iscrowd": row["is_crowd"],
            "keypoints": kpt,
            "image_id": row["image_id"],
            "bbox": list(ann.bbox),
            "category_id": 1,
            "id": row["index"]
        }
    )

In [20]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

In [21]:
with open("data/annotation_2d.json", "w") as f:
    json.dump(cfg, f, cls=NpEncoder, indent=4)

## EDA