In [1]:
import os
import json
import pprint
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.5.1


In [2]:
root_dir = "datasets"
tfrecords_dir = "tfrecords"
train_images_dir = os.path.join(root_dir, "train2017")
val_images_dir = os.path.join(root_dir, "val2017")
train_annotation_file = os.path.join(root_dir, "lvis_v1_train.json")
val_annotation_file = os.path.join(root_dir, "lvis_v1_val.json")

train_images_url = "http://images.cocodataset.org/zips/train2017.zip"
val_images_url = "http://images.cocodataset.org/zips/val2017.zip"
train_annotations_url = (
    "https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip"
)
val_annotations_url = (
    "https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_val.json.zip"
)
coco_annotations_url = ("http://images.cocodataset.org/annotations/annotations_trainval2017.zip")

In [3]:
# Download image files
if not os.path.exists(train_images_dir):
    image_zip = tf.keras.utils.get_file(
        "images.zip", cache_dir=os.path.abspath("."), origin=train_images_url, extract=True,
    )
    os.remove(image_zip)
if not os.path.exists(val_images_dir):
    image_zip = tf.keras.utils.get_file(
        "images.zip", cache_dir=os.path.abspath("."), origin=val_images_url, extract=True,
    )
    os.remove(image_zip)

In [4]:
# Download caption annotation files
if not os.path.exists(train_annotation_file):
    annotation_zip = tf.keras.utils.get_file(
        "captions.zip",
        cache_dir=os.path.abspath("."),
        origin=train_annotations_url,
        extract=True,
    )
    os.remove(annotation_zip)
    
if not os.path.exists(val_annotation_file):
    annotation_zip = tf.keras.utils.get_file(
        "captions.zip",
        cache_dir=os.path.abspath("."),
        origin=val_annotations_url,
        extract=True,
    )
    os.remove(annotation_zip)

print("The LVIS dataset has been downloaded and extracted successfully.")

The LVIS dataset has been downloaded and extracted successfully.


In [5]:
with open(train_annotation_file, "r") as f:
    train_annotations = json.load(f)["annotations"]
    
with open(val_annotation_file, "r") as f:
    val_annotations = json.load(f)["annotations"] 

print(f"Number of train images: {len(train_annotations)}")
print(f"Number of validation images: {len(val_annotations)}")

Number of train images: 1270141
Number of validation images: 244707


In [6]:
pprint.pprint(train_annotations[30])

{'area': 82.38,
 'bbox': [46.97, 459.24, 16.92, 10.84],
 'category_id': 911,
 'id': 31,
 'image_id': 356358,
 'segmentation': [[48.16,
                   466.38,
                   50.4,
                   466.51,
                   52.26,
                   466.51,
                   54.37,
                   464.93,
                   56.09,
                   464.0,
                   57.02,
                   462.02,
                   56.88,
                   459.24,
                   58.87,
                   460.17,
                   60.72,
                   461.49,
                   62.44,
                   462.81,
                   63.89,
                   464.13,
                   63.5,
                   466.78,
                   60.45,
                   467.83,
                   58.21,
                   468.1,
                   56.75,
                   468.76,
                   54.64,
                   469.69,
                   52.12,
                   47

In [7]:
train_num_samples = 1270141
val_num_samples = 244707

if not os.path.exists(tfrecords_dir):
    os.makedirs(tfrecords_dir)  # creating TFRecords output folder

In [8]:
def image_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def create_example(image, classes_text, bbox):
    xmin, ymin, xmax, ymax = bbox
    feature = {
        "image/encoded": image_feature(image),
        "image/object/bbox/xmin": float_feature(xmin),
        "image/object/bbox/ymin": float_feature(ymin),
        "image/object/bbox/xmax": float_feature(xmax),
        "image/object/bbox/ymax": float_feature(ymax),
        "image/object/class/text": bytes_feature(classes_text),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

In [44]:
class_map = {idx: name for idx, name in enumerate(
        open("./data/lvis.names").read().splitlines())}
print(class_map[0])

aerosol_can


In [57]:
# since LVIS val instance includes train data, create val first
from collections import defaultdict
samples = defaultdict(list)
for annots in val_annotations:
    samples[annots["image_id"]].append(annots)
    
from PIL import Image

with tf.io.TFRecordWriter(tfrecords_dir + "/LVIS_val.tfrecord") as writer:
    for img_id, sample in samples.items():
        bbox = [[] for _ in range(4)]
        classes_text = []
        image_path = f"{train_images_dir}/{img_id:012d}.jpg"
        if not os.path.exists(image_path):
            image_path = f"{val_images_dir}/{img_id:012d}.jpg"
        image = open(image_path, 'rb').read()
        size = Image.open(image_path).size
        for instance in sample:
            width, height = size
            x, y, w, h = instance["bbox"]
            xmin = x/width
            ymin = y/height
            xmax = (x + w)/width
            ymax = (y + h)/height
            w, h = w/width, h/height
            if 0.05 < w < 1 and 0.05 < h < 1:
                bbox[0].append(xmin)
                bbox[1].append(ymin)
                bbox[2].append(xmax)
                bbox[3].append(ymax)
                classes_text.append(class_map[instance["category_id"]-1].encode('utf8'))
        example = create_example(image, classes_text, np.array(bbox))
        writer.write(example.SerializeToString())

In [61]:
samples = defaultdict(list)
for annots in val_annotations:
    samples[annots["image_id"]].append(annots)
    
max_num = 0
for img_id, sample in samples.items():
    if len(sample) >= 100:
        max_num = max(max_num, len(sample))
print(max_num)

759


In [58]:
# Create train tfrecord file
samples = defaultdict(list)
for annots in train_annotations:
    samples[annots["image_id"]].append(annots)

with tf.io.TFRecordWriter(tfrecords_dir + "/LVIS_train.tfrecord") as writer:
    for img_id, sample in samples.items():
        bbox = [[] for _ in range(4)]
        classes_text = []
        image_path = f"{train_images_dir}/{img_id:012d}.jpg"
        image = open(image_path, 'rb').read()
        size = Image.open(image_path).size
        for instance in sample:
            width, height = size
            x, y, w, h = instance["bbox"]
            xmin = x/width
            ymin = y/height
            xmax = (x + w)/width
            ymax = (y + h)/height
            w, h = w/width, h/height
            if 0.05 < w < 1 and 0.05 < h < 1:
                bbox[0].append(xmin)
                bbox[1].append(ymin)
                bbox[2].append(xmax)
                bbox[3].append(ymax)
                classes_text.append(class_map[instance["category_id"]-1].encode('utf8'))
        example = create_example(image, classes_text, np.array(bbox))
        writer.write(example.SerializeToString())   