In [1]:
!pip install waymo-open-dataset-tf-2-11-0==1.5.1
!pip install Pillow==9.0.0

# Imports
import os
import tarfile
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tqdm

from waymo_open_dataset.wdl_limited.sim_agents_metrics import metric_features
from waymo_open_dataset.wdl_limited.sim_agents_metrics import metrics

from waymo_open_dataset.protos import scenario_pb2
from waymo_open_dataset.protos import sim_agents_metrics_pb2
from waymo_open_dataset.protos import sim_agents_submission_pb2
from google.protobuf import text_format

from waymo_open_dataset.utils.sim_agents import submission_specs
from waymo_open_dataset.utils.sim_agents import test_utils as sim_agents_test_utils
from waymo_open_dataset.utils.sim_agents import visualizations
from waymo_open_dataset.utils import trajectory_utils

# Set matplotlib to jshtml so animations work with colab.
from matplotlib import rc
rc('animation', html='jshtml')

import os
from google.cloud import storage
from tqdm import tqdm
import re

# mount drive
from google.colab import drive
drive.mount('/content/drive')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting waymo-open-dataset-tf-2-11-0==1.5.1
  Downloading waymo_open_dataset_tf_2_11_0-1.5.1-py3-none-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (4.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.0/4.0 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
Collecting dask[dataframe]==2023.3.1 (from waymo-open-dataset-tf-2-11-0==1.5.1)
  Downloading dask-2023.3.1-py3-none-any.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einsum==0.3.0 (from waymo-open-dataset-tf-2-11-0==1.5.1)
  Downloading einsum-0.3.0-py3-none-any.whl (5.1 kB)
Collecting google-auth==2.16.2 (from waymo-open-dataset-tf-2-11-0==1.5.1)
  Downloading google_auth-2.16.2-py2.py3-none-any.whl (177 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.2/177.2 kB[0m [31m10.7 MB/s[0m eta

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Pillow==9.0.0
  Downloading Pillow-9.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/4.3 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Pillow
  Attempting uninstall: Pillow
    Found existing installation: Pillow 9.2.0
    Uninstalling Pillow-9.2.0:
      Successfully uninstalled Pillow-9.2.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scikit-image 0.20.0 requires pillow>=9.0.1, but you have pillow 9.0.0 which is incompatible.
seaborn 0.12.2 requires matplotlib!=3.6.1,>=3.1, but you have matplotlib 3.6.1 which is incompatible.
waymo-open-dataset-tf-2-11-0 1.5.1 requires pillow==9.2.0, but you have pillow 9.0.0 which 

Mounted at /content/drive


# Downloader

In [2]:
def download_from_gcs(gcs_path):
    # Extract the filename from the file URL
    filename = re.search('/(training|testing|validation)/(.+\.tfrecord-\d+-of-\d+)', gcs_path).group(2)
    # local path
    local_path = '/content/drive/MyDrive/Colab Notebooks/waymo_open_dataset_/' + filename
    # check if the file already exists 
    if os.path.isfile(local_path):
        print(f'{filename} already exists in {local_path}')
    else:
        # Set up the storage client
        client = storage.Client()
        # Get a reference to the bucket and object
        bucket = client.bucket('waymo_open_dataset_motion_v_1_2_0')
        blob = bucket.blob(gcs_path)

        # Download the object to a file with a progress bar
        with tqdm.wrapattr(open(local_path, 'wb'), 'write', miniters=1,
                          total=blob.size, desc=f'Downloading {gcs_path}') as file_obj:
            blob.download_to_file(file_obj)

        print(f'Object downloaded to {local_path}')

In [3]:
# Download samples
download_from_gcs('uncompressed/scenario/validation/validation.tfrecord-00000-of-00150')
download_from_gcs('uncompressed/scenario/training/training.tfrecord-00000-of-01000')
download_from_gcs('uncompressed/scenario/testing/testing.tfrecord-00000-of-00150')

validation.tfrecord-00000-of-00150 already exists in /content/drive/MyDrive/Colab Notebooks/waymo_open_dataset_/validation.tfrecord-00000-of-00150
training.tfrecord-00000-of-01000 already exists in /content/drive/MyDrive/Colab Notebooks/waymo_open_dataset_/training.tfrecord-00000-of-01000
testing.tfrecord-00000-of-00150 already exists in /content/drive/MyDrive/Colab Notebooks/waymo_open_dataset_/testing.tfrecord-00000-of-00150


# Loader

In [4]:
DATASET_FOLDER = '/content/drive/MyDrive/Colab Notebooks/waymo_open_dataset_'

TRAIN_FILES = os.path.join(DATASET_FOLDER, 'training.tfrecord*')
VALIDATION_FILES = os.path.join(DATASET_FOLDER, 'validation.tfrecord*')
TEST_FILES = os.path.join(DATASET_FOLDER, 'testing.tfrecord*')

In [5]:
def Prepare_train_dataset():
    filenames = tf.io.matching_files(TRAIN_FILES)
    dataset = tf.data.TFRecordDataset(filenames)
    return dataset

def Prepare_validation_dataset():
    filenames = tf.io.matching_files(VALIDATION_FILES)
    dataset = tf.data.TFRecordDataset(filenames)
    return dataset

def Prepare_test_dataset():
    filenames = tf.io.matching_files(TEST_FILES)
    dataset = tf.data.TFRecordDataset(filenames)
    return dataset

In [6]:
# scenario exemple
dataset_iterator = Prepare_train_dataset().as_numpy_iterator()
bytes_example = next(dataset_iterator)
scenario = scenario_pb2.Scenario.FromString(bytes_example)
print(f'Checking type: {type(scenario)}')
print(f'Loaded scenario with ID: {scenario.scenario_id}')

Checking type: <class 'waymo_open_dataset.protos.scenario_pb2.Scenario'>
Loaded scenario with ID: 4b60f9400a30ceaf


In [11]:
# get the list of all the possible functions of the scenario object
print(f'List of all the fields of the scenario object: {[f for f in dir(scenario) if not callable(f)]}')

List of all the fields of the scenario object: ['ByteSize', 'Clear', 'ClearExtension', 'ClearField', 'CopyFrom', 'DESCRIPTOR', 'DiscardUnknownFields', 'Extensions', 'FindInitializationErrors', 'FromString', 'HasExtension', 'HasField', 'IsInitialized', 'ListFields', 'MergeFrom', 'MergeFromString', 'ParseFromString', 'RegisterExtension', 'SerializePartialToString', 'SerializeToString', 'SetInParent', 'UnknownFields', 'WhichOneof', '_CheckCalledFromGeneratedFile', '_SetListener', '__class__', '__deepcopy__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__unicode__', '_extensions_by_name', '_extensions_by_number', 'compressed_frame_laser_data', 'current_time_index', 'dynamic_map_states', 'm