In [None]:
import bioio

In [None]:
loader = bioio.load_dataspec('dataspec.yml')
loader.datastruct()

In [None]:
x = loader[42]
x

In [None]:
import importlib

abc = importlib.import_module('numpy')

In [None]:
abc.__name__

In [None]:
import yaml
import importlib

import tensorflow as tf

# class YAMLImportStatement(yaml.YAMLObject):
#     yaml_loader = yaml.SafeLoader
#     yaml_tag = 'import'

#     @classmethod
#     def from_yaml(cls, loader, node):
#         module_name = loader.construct_scalar(node)
#         imported_module = importlib.import_module(loader.construct_scalar(node))
#         print(module_name)
#         return module_name

class ConnectorWrapper():
    def __init__(self, spec, connector=None):
        self.spec = spec
        self.connector = connector
    
    # def __repr__(self):
    #     return f'{type(self).__name__}({type(self.connector).__name__}, shape={self.spec.shape}, dtype={self.spec.dtype.name})'
    
    def __repr__(self):
        return f'{type(self).__name__}({self.connector}, {self.spec})'
    
    def __call__(self, *args, **kwargs):
        if self.connector is None:
            raise ValueError('No connector attached!')
        
        outputs = self.connector(*args, **kwargs)
        outputs = self._cast_dtype(outputs)
        
        # assert outputs shapes (and dtype, although it's already been casted so this is redundant)
        self._assert_spec(outputs)
        
        return outputs

    def _cast_dtype(self, x):
        return tf.cast(x, dtype=self.spec.dtype)
    
    def _assert_spec(self, x):
        assert self.spec.is_compatible_with(x)

class TensorSpec(tf.TensorSpec):
    def __init__(self, shape, dtype):
        super(TensorSpec, self).__init__(shape=tuple(shape), dtype=tf.dtypes.as_dtype(dtype))
    
    def __repr__(self):
        return f'{type(self).__name__}(shape={self.shape}, dtype={self.dtype.name})'

class Fasta:
    def __init__(self, filepath):
        self.filepath = filepath
    
    def __repr__(self):
        return f'{type(self).__name__}'

class MyLoader(yaml.SafeLoader):
    REGISTERED_CONNECTORS = {'Fasta': Fasta}
    REGISTERED_LOADERS = {'Bed': Fasta}
    
    def __init__(self, *args, **kwargs):
        super(MyLoader, self).__init__(*args, **kwargs)                      
    #     self.add_constructor('!import', self._import)
        self.add_multi_constructor('!Loader:', self.loader_constructor)
        self.add_multi_constructor('!Connector:', self.connector_constructor)
    
    # def _import(self, loader, node):
    #     imported_module = importlib.import_module(loader.construct_scalar(node))
    #     print(imported_module.__name__)
    
    def loader_constructor(self, loader, tag_suffix, node):
        fields = loader.construct_mapping(node, deep=True)
        return self.REGISTERED_LOADERS[tag_suffix](**fields['args'])
    
    def connector_constructor(self, loader, tag_suffix, node):
        fields = loader.construct_mapping(node, deep=True)
        for field in fields:
            if field not in ['module', 'args', 'spec']:
                raise ValueError(f'Unexpected field \'{field}\'')
        
        print(type(loader).__name__)
        print(tag_suffix)
        fields = loader.construct_mapping(node, deep=True)
        print(fields)
        
        # create connector
        connector = self._make_connector(tag_suffix, **fields['args'])
            
        # create spec
        if 'spec' not in fields:
            raise ValueError('Missing mandatory field \'spec\'. ')
        
        return ConnectorWrapper(TensorSpec(**fields['spec']), connector)
    
    def _make_connector(self, tag_suffix, **kwargs):
        return self.REGISTERED_CONNECTORS[tag_suffix](**kwargs)
    
    def _make_spec(self, **kwargs):
        return TensorSpec(**kwargs)
    
class EmptyMyLoader(MyLoader):
    def _make_connector(self, tag_suffix, **kwargs):
        return None
    

def load_dataspec(dataspec_yaml, spec_only=False):
    loader = MyLoader
    if spec_only:
        loader = EmptyMyLoader
    
    with open('test.yml', 'r') as f:
        data = yaml.load(f, loader)
        
    return data

data = load_dataspec('test.yml', spec_only=True)
print('---')
print(data)

In [None]:
bioio.loaders.BaseLoader

In [None]:
def load_dataspec(dataspec_yaml, spec_only=False):
    loader = MyLoader
    if spec_only:
        loader = EmptyMyLoader
    
    with open('test.yml', 'r') as f:
        data = yaml.load(f, loader)
        
    return data

In [None]:
import yaml

import bioio
# print(bioio.REGISTERED_LOADERS)
# print(bioio.REGISTERED_CONNECTORS)

print('---')
loader = bioio.load_dataspec('dataspec.yml', dry=False)
print('---')
print(loader)
loader.summary()

In [None]:
loader.summary()
print(loader[42])

In [5]:
import bioio

dataset = bioio.load_tfrecords(['data.tfrecord'], 'dataspec.yml')
print(dataset.element_spec)

print('\n---\n')

for s in dataset.take(1):
    print(s.keys())
    print(s)

{'version': '0.1.0', 'loader': None, 'data_structure': {'meta': ConnectorWrapper(NoneType, TensorSpec(shape=(), dtype=string)), 'inputs': ConnectorWrapper(NoneType, TensorSpec(shape=(None, 4), dtype=int8)), 'outputs': {'TaskOne': {'total': ConnectorWrapper(NoneType, TensorSpec(shape=(None,), dtype=float32)), 'control': ConnectorWrapper(NoneType, TensorSpec(shape=(None,), dtype=float32))}}}}
{'inputs': TensorSpec(shape=<unknown>, dtype=tf.int8, name=None), 'meta': TensorSpec(shape=<unknown>, dtype=tf.string, name=None), 'outputs': {'TaskOne': {'control': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'total': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None)}}}

---

dict_keys(['inputs', 'meta', 'outputs'])
{'inputs': <tf.Tensor: shape=(201, 4), dtype=int8, numpy=
array([[0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 1, 0, 0]

In [None]:
import tensorflow as tf

s = tf.cast([1,2,3], tf.float16)
s

s_d = s.dtype

In [None]:
s_d

In [None]:
loader.summary()

In [None]:
type(loader)

In [None]:
with open('test.yml', 'r') as f:
    s = f.read()
print(s)

In [None]:
s

In [None]:
loader.datastruct()

In [None]:
REGISTERED_LOADERS['a'] = 2

In [None]:
bioio.REGISTERED_LOADERS

In [None]:
from bioio.engine import REGISTERED_LOADERS, REGISTERED_CONNECTORS

In [None]:
REGISTERED_LOADERS

In [None]:
with open('test.yml', 'r') as f:
    data = yaml.load(f, yaml.UnsafeLoader)

In [None]:
import yaml

In [None]:
class CustomLoader(yaml.SafeLoader):
    def __init__(self):
        pass

my_loader = CustomLoader()

In [None]:
my_loader.yaml_constructors

In [None]:
dataset = loader.dataset
dataset = dataset.batch(4)

In [None]:
for x in dataset:
    print(x)
    break

In [None]:
import tensorflow as tf

with tf.io.TFRecordWriter('abc.tfrecord') as writer:
    for sample in iter(loader):
        writer.write(loader.serialize(sample))

In [None]:
# for s in io.load_tfrecords(['abc.tfrecord'], 'dataspec.yml'):
#     print(s)

In [None]:
dataset = loader.dataset
print(dataset.element_spec)

In [None]:
dataset = dataset.batch(8)
for b in dataset.take(1):
    print(b)