Permalink
Fetching contributors…
Cannot retrieve contributors at this time
91 lines (77 sloc) 2.67 KB
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_convert_voc2012.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_voc2012_data.py
# - download_and_convert_voc2012.sh
# - remove_gt_colormap.py
# + pascal_voc_seg
# + VOCdevkit
# + VOC2012
# + JPEGImages
# + SegmentationClass
#
# Exit immediately if a command exits with a non-zero status.
set -e
CURRENT_DIR=$(pwd)
WORK_DIR="./pascal_voc_seg"
mkdir -p "${WORK_DIR}"
cd "${WORK_DIR}"
# Helper function to download and unpack VOC 2012 dataset.
download_and_uncompress() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f "${FILENAME}" ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}"
fi
echo "Uncompressing ${FILENAME}"
tar -xf "${FILENAME}"
}
# Download the images.
BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
FILENAME="VOCtrainval_11-May-2012.tar"
download_and_uncompress "${BASE_URL}" "${FILENAME}"
cd "${CURRENT_DIR}"
# Root path for PASCAL VOC 2012 dataset.
PASCAL_ROOT="${WORK_DIR}/VOCdevkit/VOC2012"
# Remove the colormap in the ground truth annotations.
SEG_FOLDER="${PASCAL_ROOT}/SegmentationClass"
SEMANTIC_SEG_FOLDER="${PASCAL_ROOT}/SegmentationClassRaw"
echo "Removing the color map in ground truth annotations..."
python ./remove_gt_colormap.py \
--original_gt_folder="${SEG_FOLDER}" \
--output_dir="${SEMANTIC_SEG_FOLDER}"
# Build TFRecords of the dataset.
# First, create output directory for storing TFRecords.
OUTPUT_DIR="${WORK_DIR}/tfrecord"
mkdir -p "${OUTPUT_DIR}"
IMAGE_FOLDER="${PASCAL_ROOT}/JPEGImages"
LIST_FOLDER="${PASCAL_ROOT}/ImageSets/Segmentation"
echo "Converting PASCAL VOC 2012 dataset..."
python ./build_voc2012_data.py \
--image_folder="${IMAGE_FOLDER}" \
--semantic_segmentation_folder="${SEMANTIC_SEG_FOLDER}" \
--list_folder="${LIST_FOLDER}" \
--image_format="jpg" \
--output_dir="${OUTPUT_DIR}"