In [18]:
"""CT_Lymph_Nodes dataset."""

import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow_datasets.core import utils
import numpy as np
import os
import io
import pydicom

# BibTeX citation
_CITATION = """
\@misc{CT_Lymph_Nodes_Citation,
  doi = {10.1007/978-3-319-10404-1_65},
  url = {https://wiki.cancerimagingarchive.net/display/Public/CT+Lymph+Nodes#12d41e510fe547b59000cd90afb8dbf2},
  author = {Roth, Holger R., Lu, Le, Seff, Ari, Cherry, Kevin M., Hoffman, Joanne, Wang, Shijun, Liu, Jiamin, Turkbey, Evrim and Summers, Ronald M.},
  title = {A New 2.5D Representation for Lymph Node Detection Using Random Sets of Deep Convolutional Neural Network Observations},
  publisher = {Springer International Publishing},
  year = {2014},
}
@article{TCIA_Citation,
  author = {
    K. Clark and B. Vendt and K. Smith and J. Freymann and J. Kirby and
    P. Koppel and S. Moore and S. Phillips and D. Maffitt and M. Pringle and
    L. Tarbox and F. Prior
  },
  title = {{The Cancer Imaging Archive (TCIA): Maintaining and Operating a
  Public Information Repository}},
  journal = {Journal of Digital Imaging},
  volume = {26},
  month = {December},
  year = {2013},
  pages = {1045-1057},
}
"""

# Data Description
_DESCRIPTION = """
This dataset contains 110,013 Computed Tomography (CT) images of the mediastinum 
and abdomen in which lymph node positions are marked by radiologists at the 
National Institutes of Health, Clinical Center. These 10,013 images consist of 
388 mediastinal lymph nodes that come from 90 patients and a total of 595 
abdominal lymph nodes in 86 patients. All images are of 512*512 pixel arrays. 
"""

#TODO DOWNLOAD LINKS

class CT_Lymph_Nodes(tfds.core.GeneratorBasedBuilder):
  """This is a dataset containing CT scans of lymph nodes from NIH"""

  #Set up version.
  VERSION = tfds.core.Version('1.0.0')

  MANUAL_DOWNLOAD_INSTRUCTIONS = """\
  You can download the images from
  https://console.cloud.google.com/storage/browser/bme590/jingjing
  Please put all files in manual_dir.
  """
  

  def _info(self):
    # TODO(my_dataset): Specifies the tfds.core.DatasetInfo object
    return tfds.core.DatasetInfo(
        builder=self,
        # This is the description that will appear on the datasets page.
        description=_DESCRIPTION,
        # tfds.features.FeatureConnectors
        features=tfds.features.FeaturesDict({
            
            # These are the features of your dataset like images, labels ...
     
        # If there's a common (input, target) tuple from the features,
        # specify them here. They'll be used if as_supervised=True in
        # builder.as_dataset.
        
        #The CT image
        'image' : tfds.features.Tensor(shape=(512,512),dtype=tf.int16),
        ## The mask
        'mask' : tfds.features.Tensor(shape=(512,512),dtype = tf.int16)
        }),
        supervised_keys=('image','mask'),
        # Homepage of the dataset for documentation
        homepage='https://dataset-homepage/',
        citation=_CITATION,
    )

  def _split_generators(self, dl_manager):
    """Returns SplitGenerators."""
    
    filepath = dl_manager.manual_dir
    # There is no predefined tran/val/test split for this dataset
    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            # These kwargs will be passed to _generate_examples
            gen_kwargs={'filepath':filepath},
        ),
    ]

  def _generate_examples(self,filepath):
    """Yields examples for the CT lymph nodes dataset
    Args:
        filepath: path to the CT lymph nodes files
    Yields:
        Dictionariew with images and masks
    
    """
    
    yield 'key', {}



In [1]:
CT_Lymph_Nodes()._info   

In [19]:
tfds.load('CT_Lymph_Nodes')

[1mDownloading and preparing dataset ct__lymph__nodes/1.0.0 (download: Unknown size, generated: Unknown size, total: Unknown size) to /home/jupyter/tensorflow_datasets/ct__lymph__nodes/1.0.0...[0m


TypeError: <class '__main__.CT_Lymph_Nodes'> is a built-in class