In [16]:
from google.cloud import storage
import os
import tensorflow as tf
import math
import numpy as np
import itertools

tf.enable_eager_execution()

## Reading tfrecords from google cloud storage 

In [None]:
def list_blobs_with_prefix(bucket_name, prefix, delimiter=None):
    
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)

    blobs = bucket.list_blobs(prefix=prefix, delimiter=delimiter)
    file_list = []
    for blob in blobs:
        file_list.append('gs://waymo_validation/'+blob.name)

    return file_list


In [10]:
def _parse_function(proto):
    # define your tfrecord again. Remember that you saved your image as a string.
    image_feature_description={
      'image/filename': tf.io.VarLenFeature(tf.string),
      'image/format': tf.io.FixedLenFeature([],tf.string),
      'image/encoded': tf.io.FixedLenFeature([],tf.string),
      'image/object/bbox/center_x': tf.io.VarLenFeature(tf.float32),
      'image/object/bbox/center_y': tf.io.VarLenFeature(tf.float32),
      'image/object/bbox/width': tf.io.VarLenFeature(tf.float32),
      'image/object/bbox/height': tf.io.VarLenFeature(tf.float32),
      'image/object/class/text': tf.io.VarLenFeature(tf.string),
      'image/object/class/label': tf.io.VarLenFeature(tf.int64),
  }
    # Load one example
    parsed_features = tf.parse_single_example(proto, image_feature_description)
    
    # Turn your saved image string into an array
    # parsed_features['image/encoded'] = tf.decode_raw(
    #     parsed_features['image/encoded'], tf.uint8)
    parsed_features['image/encoded'] = tf.io.decode_jpeg(parsed_features['image/encoded'])
    ## modify this to return what data is needed
    return parsed_features['image/encoded'], parsed_features["image/object/class/text"]

In [11]:
def read_tfrecords(filenames):
    dataset = tf.data.TFRecordDataset(filenames)
    #dataset = ds_train
    # Maps the parser on every filepath in the array. You can set the number of parallel loaders here
    dataset = dataset.map(_parse_function, num_parallel_calls=8)

    # This dataset will go on forever
    dataset = dataset.repeat()

    # Set the number of datapoints you want to load and shuffle 
    dataset = dataset.shuffle(3)

    # Set the batchsize
    dataset = dataset.batch(8)

    # Create an iterator
    iterator = dataset.make_one_shot_iterator()

    # Create your tf representation of the iterator
    image, label = iterator.get_next()
    # Bring your picture back in shape
    return image, label

In [None]:
#get tf records list from gcs
validation_filenames = list_blobs_with_prefix('waymo_validation','validation_dataset')
images,labels = read_tfrecords(validation_filenames)

## uplaod a local file to gcs

In [18]:
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)
    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))

In [20]:
upload_blob('waymo_validation','xyz.png','abc/xsx')

File xyz.png uploaded to abc/xsx.


## uplaod a local variable or data in variable to gcs

In [24]:
def upload_blob2(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_string(source_file_name,content_type='image/png')
    #blob.upload_blob
    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))