In [1]:
import os
import re
import json
import errno
import pandas as pd

from shutil import rmtree

from tqdm import tqdm
from glob import iglob
from ast import literal_eval
from os.path import join as pjoin

In [2]:
SRC_DIR = "/home/jhkim980112/workspace/code/CV_project/fire/playground"

DATA_DIR = "/home/jhkim980112/workspace/dataset/CUB_200_2011" 
CUB_DATA_DIR = pjoin(DATA_DIR, "CUB_200_2011")

CUB_IMAGE_DIR = pjoin(CUB_DATA_DIR, "images")
CUB_ATTR_DIR = pjoin(CUB_DATA_DIR, "attributes")
CUB_PART_DIR = pjoin(CUB_DATA_DIR, "parts")

In [3]:
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise
def del_folder(path):
    try:
        rmtree(path)
    except:
        pass

def read_json(json_path):
    with open(json_path, 'r') as f:
        return json.load(f)

def write_json(save_path, json_obj):
    with open(save_path, 'w', encoding='utf-8') as make_file:
        json.dump(json_obj, make_file, indent="\t")
    return

In [4]:
def read_text(text_path):
    with open(text_path, 'r') as f:
        lines = f.readlines()
    return list(map(lambda x: x.strip(), lines))

In [5]:
parts_lines = read_text(pjoin(CUB_PART_DIR, "parts.txt"))
parts_map = {int(p.split()[0]): ' '.join(p.split()[1:]) for p in parts_lines}

parts_map

{1: 'back',
 2: 'beak',
 3: 'belly',
 4: 'breast',
 5: 'crown',
 6: 'forehead',
 7: 'left eye',
 8: 'left leg',
 9: 'left wing',
 10: 'nape',
 11: 'right eye',
 12: 'right leg',
 13: 'right wing',
 14: 'tail',
 15: 'throat'}

In [6]:
image_path_df = pd.read_csv(pjoin(CUB_DATA_DIR, "images.txt"), sep=" ", \
    names=['id', 'path'])
image_split_df = pd.read_csv(pjoin(CUB_DATA_DIR, "train_test_split.txt"), sep=" ", \
    names=['id', 'split'])

In [7]:
image_df = pd.merge(image_path_df, image_split_df, how='inner', on='id')
image_df

Unnamed: 0,id,path,split
0,1,001.Black_footed_Albatross/Black_Footed_Albatr...,0
1,2,001.Black_footed_Albatross/Black_Footed_Albatr...,1
2,3,001.Black_footed_Albatross/Black_Footed_Albatr...,0
3,4,001.Black_footed_Albatross/Black_Footed_Albatr...,1
4,5,001.Black_footed_Albatross/Black_Footed_Albatr...,1
...,...,...,...
11783,11784,200.Common_Yellowthroat/Common_Yellowthroat_00...,1
11784,11785,200.Common_Yellowthroat/Common_Yellowthroat_00...,0
11785,11786,200.Common_Yellowthroat/Common_Yellowthroat_00...,0
11786,11787,200.Common_Yellowthroat/Common_Yellowthroat_00...,1


In [15]:
image_df['split'] = image_df['split'].astype("str")
image_df

Unnamed: 0,id,path,split
0,1,001.Black_footed_Albatross/Black_Footed_Albatr...,0
1,2,001.Black_footed_Albatross/Black_Footed_Albatr...,1
2,3,001.Black_footed_Albatross/Black_Footed_Albatr...,0
3,4,001.Black_footed_Albatross/Black_Footed_Albatr...,1
4,5,001.Black_footed_Albatross/Black_Footed_Albatr...,1
...,...,...,...
11783,11784,200.Common_Yellowthroat/Common_Yellowthroat_00...,1
11784,11785,200.Common_Yellowthroat/Common_Yellowthroat_00...,0
11785,11786,200.Common_Yellowthroat/Common_Yellowthroat_00...,0
11786,11787,200.Common_Yellowthroat/Common_Yellowthroat_00...,1


In [17]:
for idx in image_df.index:
    bird_class = int(image_df.at[idx, "path"].split(".")[0])
    if bird_class <= 100:
        image_df.at[idx, "split"] = "train"
    elif bird_class <= 200:
        image_df.at[idx, "split"] = "test"
        
image_df['split'].value_counts()

test     5924
train    5864
Name: split, dtype: int64

In [18]:
part_locs_lines = read_text(pjoin(CUB_PART_DIR, "part_locs.txt"))
part_click_locs_lines = read_text(pjoin(CUB_PART_DIR, "part_click_locs.txt"))
len(part_locs_lines), len(part_click_locs_lines)

(176820, 794258)

In [19]:
part_locs = list(map(lambda x: tuple(map(float, x.split())), part_locs_lines))

cub_part_annotations = []
for i, row in tqdm(image_df.iterrows(), total=len(image_df)):
    parts_for_img = list(filter(lambda x: int(x[0] - 1) == i, part_locs))
    parts_annot = []
    for (_, part_id, x, y, visible) in parts_for_img:
        parts_annot += [dict(part_id=str(part_id),
                            part_name=parts_map[part_id],
                            x=x,
                            y=y,
                            visible=bool(visible))]
        
    cub_part_annotations += [dict(image_path=row['path'],
                                image_id=str(i + 1),
                                split=row['split'],
                                parts=parts_annot)]

100%|██████████| 11788/11788 [03:09<00:00, 62.34it/s]


In [22]:
cub_part_annotations

[{'image_path': '001.Black_footed_Albatross/Black_Footed_Albatross_0046_18.jpg',
  'image_id': '1',
  'split': True,
  'parts': [{'part_id': '1.0',
    'part_name': 'back',
    'x': 0.0,
    'y': 0.0,
    'visible': False},
   {'part_id': '2.0',
    'part_name': 'beak',
    'x': 312.0,
    'y': 182.0,
    'visible': True},
   {'part_id': '3.0',
    'part_name': 'belly',
    'x': 0.0,
    'y': 0.0,
    'visible': False},
   {'part_id': '4.0',
    'part_name': 'breast',
    'x': 0.0,
    'y': 0.0,
    'visible': False},
   {'part_id': '5.0',
    'part_name': 'crown',
    'x': 186.0,
    'y': 45.0,
    'visible': True},
   {'part_id': '6.0',
    'part_name': 'forehead',
    'x': 247.0,
    'y': 79.0,
    'visible': True},
   {'part_id': '7.0',
    'part_name': 'left eye',
    'x': 0.0,
    'y': 0.0,
    'visible': False},
   {'part_id': '8.0',
    'part_name': 'left leg',
    'x': 0.0,
    'y': 0.0,
    'visible': False},
   {'part_id': '9.0',
    'part_name': 'left wing',
    'x': 0.0,
 

In [20]:
train_annot = list(filter(lambda x: x['split'] == "train", cub_part_annotations))
test_annot = list(filter(lambda x: x['split'] == "test", cub_part_annotations))

In [21]:
len(train_annot), len(test_annot)

(0, 0)

In [20]:
#from sklearn.model_selection import train_test_split

#train_annot, val_annot = train_test_split(entire_annot, test_size=0.1, random_state=42, shuffle=True)

In [21]:
len(train_annot), len(test_annot)

(5394, 600, 5794)

In [22]:
save_dir = pjoin(DATA_DIR, "annotations")

mkdir_p(save_dir)
#write_json(save_path=pjoin(save_dir, "data.json"), json_obj=entire_annot)
write_json(save_path=pjoin(save_dir, "train.json"), json_obj=train_annot)
#write_json(save_path=pjoin(save_dir, "val.json"), json_obj=val_annot)
write_json(save_path=pjoin(save_dir, "test.json"), json_obj=test_annot)