In [1]:
import os
import cv2
import math
import json
import glob
import random
import numpy as np
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt

VOC_CLASSES = [
    "background",
    "aeroplane",
    "bicycle",
    "bird",
    "boat",
    "bottle",
    "bus",
    "car",
    "cat",
    "chair",
    "cow",
    "diningtable",
    "dog",
    "horse",
    "motorbike",
    "person",
    "potted plant",
    "sheep",
    "sofa",
    "train",
    "tv/monitor",
]


VOC_COLORMAP = [
    [0, 0, 0],
    [128, 0, 0],
    [0, 128, 0],
    [128, 128, 0],
    [0, 0, 128],
    [128, 0, 128],
    [0, 128, 128],
    [128, 128, 128],
    [64, 0, 0],
    [192, 0, 0],
    [64, 128, 0],
    [192, 128, 0],
    [64, 0, 128],
    [192, 0, 128],
    [64, 128, 128],
    [192, 128, 128],
    [0, 64, 0],
    [128, 64, 0],
    [0, 192, 0],
    [128, 192, 0],
    [0, 64, 128],
]

In [2]:
# get classes list from image path        
def get_classes_names(path, class_names):
    gray = cv2.imread(path, 0)
    # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    ret_list = []
    for i in np.unique(gray): ret_list.append(class_names[i])
    return ret_list

In [39]:
filename = '../supervised_odgts/VOCtrain_1000.odgt'

with open(filename, 'r') as f:
    data = f.readlines()
    
dict_data = [json.loads(val) for val in data]
df = pd.DataFrame(dict_data)

In [40]:
# dict of classes. 1:contain class, 0:NO class
classes = {val:[] for val in VOC_CLASSES}
image_ids = []

for index, row in df.iterrows():
    label_path = row['fpath_segm']
    img_id = label_path.split('/')[-1].split('.')[0]
    
    image_ids.append(img_id)
    tmp_classes = get_classes_names(label_path, VOC_CLASSES)
    
    for k, v in classes.items():
        if k in tmp_classes:
            classes[k].append(1)
        else:
            classes[k].append(0)
            
df['id'] = image_ids

for k, v in classes.items():
    df[k] = classes[k]

In [42]:
df.head()

Unnamed: 0,fpath_img,fpath_segm,width,height,id,background,aeroplane,bicycle,bird,boat,...,diningtable,dog,horse,motorbike,person,potted plant,sheep,sofa,train,tv/monitor
0,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,281,500,2007_000032,1,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,375,500,2007_000039,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,375,500,2007_000063,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,375,500,2007_000068,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,375,500,2007_000121,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [43]:
# df.to_csv('../data/voc_train_data.csv')

In [45]:
df[df['aeroplane']==1].head()

Unnamed: 0,fpath_img,fpath_segm,width,height,id,background,aeroplane,bicycle,bird,boat,...,diningtable,dog,horse,motorbike,person,potted plant,sheep,sofa,train,tv/monitor
0,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,281,500,2007_000032,1,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
7,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,333,500,2007_000243,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,343,500,2007_000256,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,375,500,2007_000480,1,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
21,/data/sara/semantic-segmentation-pytorch/datas...,/data/sara/semantic-segmentation-pytorch/datas...,333,500,2007_000648,1,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
