In [1]:
! pip install -q tensorflow-io

[K     |████████████████████████████████| 25.4MB 159kB/s 
[?25h

In [56]:
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import tensorflow_io as tfio
import tensorflow as tf
import numpy as np
import importlib
import yaml
import cv2 
import os


from google.colab import auth
from pathlib import Path
from tqdm import tqdm 

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/content/drive/My Drive/KTH/DD2424/tran-unet.json' 
!echo $GOOGLE_APPLICATION_CREDENTIALS

project='dd2424-308314' # change to your project name here
os.environ['GCP_PROJECT'] = project 
os.environ['GCP_ACCOUNT'] = 'trans-unet@' + project + '.iam.gserviceaccount.com'

!gcloud auth activate-service-account "$GCP_ACCOUNT" --key-file="$GOOGLE_APPLICATION_CREDENTIALS" --project="$GCP_PROJECT"

/content/drive/My Drive/KTH/DD2424/tran-unet.json
Activated service account credentials for: [trans-unet@dd2424-308314.iam.gserviceaccount.com]


In [9]:
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  2537  100  2537    0     0   103k      0 --:--:-- --:--:-- --:--:--  103k
OK
85 packages can be upgraded. Run 'apt list --upgradable' to see them.
gcsfuse is already the newest version (0.35.0).
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 85 not upgraded.


In [10]:
!mkdir DataSet
!gcsfuse --implicit-dirs aga_bucket DataSet

mkdir: cannot create directory ‘DataSet’: File exists
2021/05/04 19:11:15.819947 Using mount point: /content/DataSet
2021/05/04 19:11:15.828446 Opening GCS connection...
2021/05/04 19:11:16.578298 Mounting file system "aga_bucket"...
2021/05/04 19:11:16.578844 File system has been successfully mounted.


In [12]:
!ls /content/DataSet

synapse-tfrecords  test_vol_h5	train_npz


## Write image to tfrecord

In [17]:
data = np.load('DataSet/train_npz/case0005_slice000.npz')

In [92]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))): # if value ist tensor
        value = value.numpy() # get value of tensor
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a floast_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_array(array):
  array = tf.io.serialize_tensor(array)
  return array

In [93]:
def parse_single_image(image, label):
  
  #define the dictionary -- the structure -- of our single example
  data = {
        'height' : _int64_feature(image.shape[0]),
        'width' : _int64_feature(image.shape[1]),
        'depth' : _int64_feature(image.shape[2]),
        'image' : _bytes_feature(serialize_array(image)),
        'label' : _bytes_feature(serialize_array(label))
    }
  #create an Example, wrapping the single features
  out = tf.train.Example(features=tf.train.Features(feature=data))

  return out

In [103]:
def write_image_to_tfr(image, label, filename:str="images"):
  
  filename= filename+".tfrecords"
  writer = tf.io.TFRecordWriter(filename) #create a writer that'll store our data to disk

  image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
  print(image_rgb.shape)
  out = parse_single_image(image=image_rgb, label=label)
  writer.write(out.SerializeToString())

  writer.close()
  print(f"Wrote {filename} elements to TFRecord")

In [104]:
write_image_to_tfr_short(data['image'], data['label'], filename='case0005_slice000')

Wrote case0005_slice000.tfrecords elements to TFRecord


In [83]:
!du -sh case0005_slice000.tfrecords

4.1M	case0005_slice000.tfrecords


In [96]:
!du -sh 'DataSet/train_npz/case0005_slice000.npz'

2.1M	DataSet/train_npz/case0005_slice000.npz


## Read a TFRecord file

In [97]:
def parse_tfr_element(element):
  #use the same structure as above; it's kinda an outline of the structure we now want to create
  data = {
      'height': tf.io.FixedLenFeature([], tf.int64),
      'width':tf.io.FixedLenFeature([], tf.int64),
      'depth':tf.io.FixedLenFeature([], tf.int64),
      'label':tf.io.FixedLenFeature([], tf.string),
      'image' : tf.io.FixedLenFeature([], tf.string),
    }

    
  content = tf.io.parse_single_example(element, data)
  
  height = content['height']
  width = content['width']
  depth = content['depth']
  raw_label = content['label']
  raw_image = content['image']
  
  
  image = tf.io.parse_tensor(raw_image, out_type=tf.float32)
  image = tf.reshape(image, shape=[height,width,depth])

  label = tf.io.parse_tensor(raw_label, out_type=tf.float32)
  image = tf.reshape(label, shape=[height,width])
  return (image, label)


In [67]:
data['label'].dtype

dtype('float32')

In [64]:
filename="case0005_slice000.tfrecords"

In [98]:
def get_dataset_small(filename):
  #create the dataset
  dataset = tf.data.TFRecordDataset(filename)

  #pass every single feature through our mapping function
  dataset = dataset.map(
      parse_tfr_element
  )
    
  return dataset

In [100]:
dataset_small = get_dataset_small(filename)

for sample in dataset_small.take(1):
  print(sample)

(<tf.Tensor: shape=(512, 512), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, <tf.Tensor: shape=(512, 512), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>)
