In [23]:
import os
import json
import pprint
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.9.1


In [2]:
root_dir = "datasets"
tfrecords_dir = "tfrecords"
train_images_dir = os.path.join(root_dir, "train2017")
val_images_dir = os.path.join(root_dir, "val2017")
train_annotation_file = os.path.join(root_dir, "lvis_v1_train.json")
val_annotation_file = os.path.join(root_dir, "lvis_v1_val.json")

train_images_url = "http://images.cocodataset.org/zips/train2017.zip"
val_images_url = "http://images.cocodataset.org/zips/val2017.zip"
train_annotations_url = (
    "https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip"
)
val_annotations_url = (
    "https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_val.json.zip"
)
coco_annotations_url = ("http://images.cocodataset.org/annotations/annotations_trainval2017.zip")

In [3]:
# Download image files
if not os.path.exists(train_images_dir):
    image_zip = tf.keras.utils.get_file(
        "images.zip", cache_dir=os.path.abspath("."), origin=train_images_url, extract=True,
    )
    os.remove(image_zip)
if not os.path.exists(val_images_dir):
    image_zip = tf.keras.utils.get_file(
        "images.zip", cache_dir=os.path.abspath("."), origin=val_images_url, extract=True,
    )
    os.remove(image_zip)

In [4]:
# Download caption annotation files
if not os.path.exists(train_annotation_file):
    annotation_zip = tf.keras.utils.get_file(
        "captions.zip",
        cache_dir=os.path.abspath("."),
        origin=train_annotations_url,
        extract=True,
    )
    os.remove(annotation_zip)
    
if not os.path.exists(val_annotation_file):
    annotation_zip = tf.keras.utils.get_file(
        "captions.zip",
        cache_dir=os.path.abspath("."),
        origin=val_annotations_url,
        extract=True,
    )
    os.remove(annotation_zip)

print("The LVIS dataset has been downloaded and extracted successfully.")

The LVIS dataset has been downloaded and extracted successfully.


In [5]:
with open(train_annotation_file, "r") as f:
    train_annotations = json.load(f)["annotations"]
    
with open(val_annotation_file, "r") as f:
    val_annotations = json.load(f)["annotations"] 

print(f"Number of train images: {len(train_annotations)}")
print(f"Number of validation images: {len(val_annotations)}")

Number of train images: 1270141
Number of validation images: 244707


In [6]:
pprint.pprint(train_annotations[60])

{'area': 15.25,
 'bbox': [235.4, 291.18, 22.18, 1.65],
 'category_id': 1037,
 'id': 61,
 'image_id': 402711,
 'segmentation': [[236.27,
                   291.27,
                   235.4,
                   291.73,
                   244.38,
                   292.65,
                   245.66,
                   292.28,
                   245.48,
                   291.54,
                   239.06,
                   291.18,
                   237.23,
                   291.18,
                   236.27,
                   291.27],
                  [251.35,
                   291.91,
                   251.71,
                   292.83,
                   257.03,
                   292.65,
                   257.58,
                   291.91,
                   256.48,
                   291.54,
                   255.38,
                   291.54,
                   251.35,
                   291.91]]}


In [7]:
train_num_samples = 1270141
val_num_samples = 244707

if not os.path.exists(tfrecords_dir):
    os.makedirs(tfrecords_dir)  # creating TFRecords output folder

In [20]:
def image_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def create_example(image, classes_text, bbox):
    x, y, w, h = bbox
    xmin = x - (w/2.)
    ymin = y - (h/2.)
    xmax = x + (w/2.)
    ymax = y + (h/2.)
    feature = {
        "image/encoded": image_feature(image),
        "image/object/bbox/xmin": float_feature(xmin),
        "image/object/bbox/ymin": float_feature(ymin),
        "image/object/bbox/xmax": float_feature(xmax),
        "image/object/bbox/ymax": float_feature(ymax),
        "image/object/class/text": bytes_feature(classes_text),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

In [37]:
# since LVIS val instance includes train data, create val first
from collections import defaultdict
samples = defaultdict(list)
for annots in val_annotations:
    samples[annots["image_id"]].append(annots)


# print(len(samples))

with tf.io.TFRecordWriter(tfrecords_dir + "/LVIS_val.tfrecord") as writer:
    for img_id, sample in samples.items():
        bbox = [[] for _ in range(4)]
        classes_text = []
        for instance in instances:
            x, y, w, h = instance["bbox"]
            bbox[0].append(x)
            bbox[1].append(y)
            bbox[2].append(w)
            bbox[3].append(h)
            classes_text.append(str(instance["category_id"]).encode('utf8'))
        image_path = f"{train_images_dir}/{img_id:012d}.jpg"
        if not os.path.exists(image_path):
            image_path = f"{val_images_dir}/{img_id:012d}.jpg"
        image = open(image_path, 'rb').read()
        example = create_example(image, classes_text, np.array(bbox))
        writer.write(example.SerializeToString())

In [27]:
# no need val image data anymore
import shutil

if os.path.exists(train_images_dir):
    shutil.rmtree(train_images_dir)

The history saving thread hit an unexpected error (OperationalError('database or disk is full')).History will not be written to the database.


In [38]:
# Create train tfrecord file

samples = defaultdict(list)
for annots in train_annotations:
    samples[annots["image_id"]].append(annots)

with tf.io.TFRecordWriter(tfrecords_dir + "/LVIS_train.tfrecord") as writer:
    for img_id, sample in samples.items():
        bbox = [[] for _ in range(4)]
        classes_text = []
        for instance in instances:
            x, y, w, h = instance["bbox"]
            bbox[0].append(x)
            bbox[1].append(y)
            bbox[2].append(w)
            bbox[3].append(h)
            classes_text.append(str(instance["category_id"]).encode('utf8'))
        image_path = f"{train_images_dir}/{img_id:012d}.jpg"
        image = open(image_path, 'rb').read()
        example = create_example(image, classes_text, np.array(bbox))
        writer.write(example.SerializeToString())    