__конвертор датасета PASCAL для семантического сегментатора изображений__

Евгений Борисов borisov.e@solarl.ru

In [None]:
# PASCAL VOC2007, VOC2012 http://host.robots.ox.ac.uk/pascal/VOC/
#
# !wget -с http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
# !wget -с http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
# !wget -с http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar

In [None]:
# VOC           # path:  /home/yang/dataset/VOC
# ├── test
# |    └──VOCdevkit
# |        └──VOC2007 (from VOCtest_06-Nov-2007.tar)
# └── train
#      └──VOCdevkit
#          └──VOC2007 (from VOCtrainval_06-Nov-2007.tar)
#          └──VOC2012 (from VOCtrainval_11-May-2012.tar)

https://github.com/YunYang1994/TensorFlow2.0-Examples/tree/master/5-Image_Segmentation/FCN

In [1]:
from os.path import join as pjoin
from os import listdir

import numpy as np
from PIL import Image
import pickle
import pandas as pd

from tqdm import tqdm
    
# from utils import colormap

In [2]:
DATA_ROOT_FOLDER = 'data/pascal/VOC'
RESULT_FOLDER = 'data/pascal_my'
IM_W, IM_H = 256,256
# IM_W, IM_H = 224,224

In [3]:
# !mkdir data/pascal_my

In [4]:
# !ls data/pascal/VOC/train/VOCdevkit/VOC2007/JPEGImages/*.jpg > data/pascal/files_input_train_2007.txt
# !ls data/pascal/VOC/train/VOCdevkit/VOC2012/JPEGImages/*.jpg > data/pascal/files_input_train_2012.txt
# !ls data/pascal/VOC/train/VOCdevkit/VOC2007/SegmentationClass/*.png > data/pascal/files_output_train_2007.txt
# !ls data/pascal/VOC/train/VOCdevkit/VOC2012/SegmentationClass/*.png > data/pascal/files_output_train_2012.txt

In [5]:
def get_files(d):
    return [
            [f,f.split('.')[0]] for f in listdir(d) 
        ]

def make_file_pair(d):
    fi = get_files( pjoin(d,'JPEGImages') )
    fo = get_files( pjoin(d,'SegmentationClass') )
    
    df = pd.DataFrame(fi).merge(pd.DataFrame(fo), on=[1] )
    df.columns = ['fname_img','dname','fname_msk']
    
    df['dname'] = d
    df['fname_img'] = df['fname_img'].apply(lambda s: '%s/%s'%('JPEGImages',s) )
    df['fname_msk'] = df['fname_msk'].apply(lambda s: '%s/%s'%('SegmentationClass',s) )
        
    return df[['dname','fname_img','fname_msk']]


def pascal_segmentation_files(d):
    df = pd.concat([
            make_file_pair( pjoin(d,'train/VOCdevkit/VOC2007') ),
            make_file_pair( pjoin(d,'train/VOCdevkit/VOC2012') )
        ])

    df['is_train'] = True
    
    df = pd.concat([df,make_file_pair( pjoin(d,'test/VOCdevkit/VOC2007') ) ])

    return df.fillna(False).reset_index(drop=True)


files = pascal_segmentation_files(DATA_ROOT_FOLDER)

In [6]:
# files

In [7]:
files = pd.concat([
        files,
        pd.DataFrame([
                Image.open( pjoin(p,fi) ).size + \
                Image.open( pjoin(p,fo) ).size
                for p,fi,fo,_ in files.values.tolist()
            ],columns=['wi','hi','wm','hm'])
    ],axis=1)

In [8]:
assert len( files.query('wi != wm') )==0
assert len( files.query('hi != hm') )==0

In [9]:
CLASS_NAME = ['background','aeroplane','bicycle','bird','boat',
           'bottle','bus','car','cat','chair','cow','diningtable',
           'dog','horse','motorbike','person','potted plant',
           'sheep','sofa','train','tv/monitor']


# RGB color for each class
COLORS = [[0,0,0],[128,0,0],[0,128,0], [128,128,0], [0,0,128],
            [128,0,128],[0,128,128],[128,128,128],[64,0,0],[192,0,0],
            [64,128,0],[192,128,0],[64,0,128],[192,0,128],
            [64,128,128],[192,128,128],[0,64,0],[128,64,0],
            [0,192,0],[128,192,0],[0,64,128]]

CLASS_IDX = { i:n for i,n in enumerate(CLASS_NAME)}

COLORMAP = { tuple(c):i for i,c in  enumerate(COLORS) }

RGB_MEAN = np.array([0.485, 0.456, 0.406])

RGB_STD = np.array([0.229, 0.224, 0.225])

In [10]:
files.to_csv(pjoin(RESULT_FOLDER,'files.csv'),index=False,sep='\t')

with open(pjoin(RESULT_FOLDER,'class_idx.pkl'),'wb') as f: pickle.dump(CLASS_IDX,f)
with open(pjoin(RESULT_FOLDER,'colormap.pkl'),'wb') as f: pickle.dump(COLORMAP,f)
    
np.save(pjoin(RESULT_FOLDER,'rgb_mean.npy'), RGB_MEAN, )
np.save(pjoin(RESULT_FOLDER,'rgb_std.npy'), RGB_STD, )

In [11]:
# import json
# json.dumps(CLASS_IDX)
# json.dumps(COLORMAP)

In [12]:
def load_img(f,resize=(IM_W,IM_H)):
    return np.array(
        Image.open(f)\
            .resize(size=resize,resample=Image.NEAREST )\
            .convert('RGB')
        )

def augmentate_data(img):
    # randomly shift contrast
    x = np.clip(img**np.random.uniform(0.8,1.2), 0, 255)
    # randomly shift brightness
    x = np.clip( x*np.random.uniform(0.5, 2.0), 0, 255)
    return x

def normalize_data(x,rgb_mean=RGB_MEAN,rgb_std=RGB_STD):
    return (x/255.-rgb_mean)/rgb_std


def nparray_as_tuple(x):
    s = x.shape
    return [ tuple(p) for p in x.reshape(s[0]*s[1],s[2])  ]

def colors_as_index(x,colormap=COLORMAP):
    s = x.shape
    return np.array( list(map( 
        lambda c: colormap[c] if c in colormap else 0, 
        nparray_as_tuple(x) 
    ))).reshape(s[:2])

In [13]:
x_test_aug = np.vstack([
        normalize_data(
            augmentate_data(
                load_img(pjoin(p,fi))
            )
        )[np.newaxis,:]
        for p,fi in tqdm( files.query('~is_train')[['dname','fname_img']].values.tolist() )
    ]).astype(np.float32)

100%|██████████| 210/210 [00:01<00:00, 130.81it/s]


In [14]:
np.save(pjoin(RESULT_FOLDER,'x_test_aug.npy'), x_test_aug, )
x_test_aug.shape

(210, 256, 256, 3)

In [15]:
x_test = np.vstack([
        normalize_data( load_img(pjoin(p,fi)) )[np.newaxis,:]
        for p,fi in tqdm( files.query('~is_train')[['dname','fname_img']].values.tolist() )
    ]).astype(np.float32)

100%|██████████| 210/210 [00:00<00:00, 382.66it/s]


In [16]:
np.save(pjoin(RESULT_FOLDER,'x_test.npy'), x_test, )
x_test.shape

(210, 256, 256, 3)

In [17]:
%xdel x_test
%xdel x_test_aug

In [18]:
y_test = np.vstack([
        colors_as_index(load_img(pjoin(p,fo)))[np.newaxis,:]
        for p,fo in tqdm( files.query('~is_train')[['dname','fname_msk']].values.tolist() )
    ])

100%|██████████| 210/210 [01:37<00:00,  2.15it/s]


In [19]:
np.save(pjoin(RESULT_FOLDER,'y_test.npy'), y_test, )
y_test.shape

(210, 256, 256)

In [20]:
%xdel y_test

---

In [21]:
x_train_aug = np.vstack([
        normalize_data(
            augmentate_data(
                load_img(pjoin(p,fi))
            )
        )[np.newaxis,:]
        for p,fi in tqdm( files.query('is_train')[['dname','fname_img']].values.tolist() )
    ]).astype(np.float32)

100%|██████████| 3335/3335 [00:21<00:00, 157.97it/s]


In [22]:
np.save(pjoin(RESULT_FOLDER,'x_train_aug.npy'), x_train_aug, )
x_train_aug.shape

(3335, 256, 256, 3)

In [23]:
%xdel x_train_aug

In [24]:
x_train = np.vstack([
        normalize_data( load_img(pjoin(p,fi)) )[np.newaxis,:]
        for p,fi in tqdm( files.query('is_train')[['dname','fname_img']].values.tolist() )
    ]).astype(np.float32)

100%|██████████| 3335/3335 [00:08<00:00, 377.63it/s]


In [25]:
np.save(pjoin(RESULT_FOLDER,'x_train.npy'), x_train, )
x_train.shape

(3335, 256, 256, 3)

In [26]:
%xdel x_train

---

In [27]:
y_train = np.vstack([
        colors_as_index(load_img(pjoin(p,fo)))[np.newaxis,:]
        for p,fo in tqdm( files.query('is_train')[['dname','fname_msk']].values.tolist() )
    ])

100%|██████████| 3335/3335 [25:53<00:00,  2.15it/s]


In [28]:
np.save(pjoin(RESULT_FOLDER,'y_train.npy'), y_train, )
y_train.shape

(3335, 256, 256)

In [29]:
%xdel y_train

In [30]:
# # files
# f='data/pascal/VOC/test/VOCdevkit/VOC2007/SegmentationClass/006303.png'
# x = load_img(f)

# x.shape

# from matplotlib import pyplot as plt
# plt.imshow(xc)