In [1]:
""" 
Create the image_id, part location visible dataset
Simply map that to the list of images in the dataset 
"""

' \nCreate the image_id, part location visible dataset\nSimply map that to the list of images in the dataset \n'

In [2]:
import pandas as pd 

In [3]:
# Read part location annotations

parts_annotation_df = pd.read_csv('/data/CUB/CUB-Original/CUB_200_2011/parts/part_locs.txt', sep=' ', header=None, names=['image_id', 'part_id', 'x', 'y', 'visible'])

parts_annotation_df = parts_annotation_df[parts_annotation_df.visible == 1]

In [4]:
# create a dictionary of part_id to part_name
id_parts_dict = {}

# Read the file and populate the dictionary
with open('/data/CUB/CUB-Original/CUB_200_2011/parts/parts.txt', 'r') as file:
    for line in file:
        key, value = line.strip().split(' ', 1)
        id_parts_dict[int(key)] = value

print(id_parts_dict)

{1: 'back', 2: 'beak', 3: 'belly', 4: 'breast', 5: 'crown', 6: 'forehead', 7: 'left eye', 8: 'left leg', 9: 'left wing', 10: 'nape', 11: 'right eye', 12: 'right leg', 13: 'right wing', 14: 'tail', 15: 'throat'}


In [5]:
parts_annotation_df['part_name'] = parts_annotation_df['part_id'].map(id_parts_dict)

In [6]:
parts_annotation_df

Unnamed: 0,image_id,part_id,x,y,visible,part_name
1,1,2,312.0,182.0,1,beak
4,1,5,186.0,45.0,1,crown
5,1,6,247.0,79.0,1,forehead
9,1,10,100.0,221.0,1,nape
10,1,11,183.0,101.0,1,right eye
...,...,...,...,...,...,...
176815,11788,11,136.0,133.0,1,right eye
176816,11788,12,102.0,316.0,1,right leg
176817,11788,13,94.0,224.0,1,right wing
176818,11788,14,36.0,329.0,1,tail


In [7]:
unique_image_ids = parts_annotation_df['image_id'].drop_duplicates().sample(n=100, random_state=6804)

In [8]:
selected_df = parts_annotation_df[parts_annotation_df['image_id'].isin(unique_image_ids)]

In [22]:
# Read in the image_id vs image_path file
from tqdm import tqdm
with open('/data/CUB/CUB-Original/CUB_200_2011/images.txt', 'r') as f:
    images = f.readlines()

image_id_path_dict = {}
for img in tqdm(images):
    image_id, path = img.strip().split()
    image_id_path_dict[int(image_id)] = path

100%|██████████| 11788/11788 [00:00<00:00, 2039032.31it/s]


In [23]:
# Add the image path to the selected_df
selected_df['image_path'] = selected_df['image_id'].map(image_id_path_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_df['image_path'] = selected_df['image_id'].map(image_id_path_dict)


In [25]:
selected_df

Unnamed: 0,image_id,part_id,x,y,visible,part_name,image_path
5851,391,2,276.0,178.0,1,beak,008.Rhinoceros_Auklet/Rhinoceros_Auklet_0051_7...
5852,391,3,215.0,197.0,1,belly,008.Rhinoceros_Auklet/Rhinoceros_Auklet_0051_7...
5853,391,4,234.0,190.0,1,breast,008.Rhinoceros_Auklet/Rhinoceros_Auklet_0051_7...
5854,391,5,253.0,168.0,1,crown,008.Rhinoceros_Auklet/Rhinoceros_Auklet_0051_7...
5855,391,6,265.0,170.0,1,forehead,008.Rhinoceros_Auklet/Rhinoceros_Auklet_0051_7...
...,...,...,...,...,...,...,...
175060,11671,11,314.0,166.0,1,right eye,199.Winter_Wren/Winter_Wren_0117_189999.jpg
175061,11671,12,210.0,290.0,1,right leg,199.Winter_Wren/Winter_Wren_0117_189999.jpg
175062,11671,13,210.0,194.0,1,right wing,199.Winter_Wren/Winter_Wren_0117_189999.jpg
175063,11671,14,169.0,120.0,1,tail,199.Winter_Wren/Winter_Wren_0117_189999.jpg


In [10]:
selected_df.part_name.unique()

array(['beak', 'belly', 'breast', 'crown', 'forehead', 'left wing',
       'nape', 'right eye', 'right wing', 'tail', 'throat', 'back',
       'left eye', 'left leg', 'right leg'], dtype=object)

In [None]:
""" 
crown, forehead and nape should be the same part: head 
left_wing and right_wing should be the same part: wing
left_eye and right_eye should be the same part: eye
left_leg and right_leg should be the same part: leg

"""

In [None]:
"""
Create a dictionary
{
    image_path: {
        part_name: [(x, y), ...]
    }
} 
"""

In [28]:
cub_part_dataset_dict = {}
for i, row in tqdm(selected_df.iterrows()):
    image_path = row['image_path']
    if image_path not in cub_part_dataset_dict:
        cub_part_dataset_dict[image_path] = {}
    part_name = row['part_name'].lower().strip()
    if part_name == 'crown' or part_name == 'forehead' or part_name == 'nape':
        part_name = 'head'
    elif part_name == 'left wing' or part_name == 'right wing':
        part_name = 'wing'
    elif part_name == 'left eye' or part_name == 'right eye':
        part_name = 'eye'
    elif part_name == 'left leg' or part_name == 'right leg':
        part_name = 'leg'
    else:
        part_name = part_name
    
    if part_name not in cub_part_dataset_dict[image_path].keys():
        cub_part_dataset_dict[image_path][part_name] = []
    
    cub_part_dataset_dict[image_path][part_name].append((row['x'], row['y']))

1223it [00:00, 36023.22it/s]


In [29]:
cub_part_dataset_dict

{'008.Rhinoceros_Auklet/Rhinoceros_Auklet_0051_797510.jpg': {'beak': [(276.0,
    178.0)],
  'belly': [(215.0, 197.0)],
  'breast': [(234.0, 190.0)],
  'head': [(253.0, 168.0), (265.0, 170.0), (240.0, 170.0)],
  'wing': [(170.0, 130.0), (215.0, 148.0)],
  'eye': [(259.0, 172.0)],
  'tail': [(154.0, 200.0)],
  'throat': [(254.0, 185.0)]},
 '015.Lazuli_Bunting/Lazuli_Bunting_0041_15152.jpg': {'back': [(293.0, 110.0)],
  'beak': [(235.0, 70.0)],
  'belly': [(239.0, 140.0)],
  'breast': [(239.0, 129.0)],
  'head': [(274.0, 67.0), (257.0, 61.0), (282.0, 91.0)],
  'eye': [(259.0, 68.0)],
  'leg': [(279.0, 215.0), (257.0, 212.0)],
  'wing': [(291.0, 135.0)],
  'tail': [(305.0, 286.0)],
  'throat': [(242.0, 87.0)]},
 '016.Painted_Bunting/Painted_Bunting_0096_15233.jpg': {'beak': [(226.0,
    155.0)],
  'belly': [(275.0, 247.0)],
  'breast': [(261.0, 205.0)],
  'head': [(266.0, 130.0), (239.0, 139.0), (311.0, 159.0)],
  'eye': [(253.0, 149.0)],
  'leg': [(339.0, 288.0), (287.0, 283.0)],
  'tail

In [30]:
import json
with open('/home/ksmehrab/AttentionGrounding/DatasetProcessing/cub_part_dataset_sampled.json', 'w') as f:
    json.dump(cub_part_dataset_dict, f, indent=4)