In [1]:
import numpy as np
import tensorflow as tf

In [2]:
ys = np.array([True, False])

In [3]:
ys.astype(int)

array([1, 0])

In [4]:
tf.convert_to_tensor(ys.astype(np.float))

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([1., 0.])>

In [None]:
class CustomDataGen(tf.keras.utils.Sequence):
    
    def __init__(self, xs, ys, batch_size,
                 input_size=(906, 4), shuffle=True):
        self.n = len(self.xs.shape[0])
        self.index = np.arange(0,self.n,1)
        self.xs = xs
        self.ys = ys
        self.shuffled_xs = self.xs[self.index]
        self.shuffled_ys = self.ys[self.index]
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.index)
            self.shuffled_xs = self.xs[self.index]
            self.shuffled_ys = self.ys[self.index]
    
    def __get_input(self, x):
    
        one_hot = tfio.genome.sequences_to_onehot(x)
        n_fill = input_size[0] - one_hot.shape[0]
        # pad with zeros to match length of longest sequence
        padded_one_hot = np.concatenate(one_hot, np.array([[0,0,0,0]]*n_fill))

        return padded_one_hot
    
    def __get_output(self, label):
        return label.astype(np.float)
    
    def __get_data(self, batches):
        # Generates data containing batch_size samples
        
        raw_x_batch = xs[batches]
        raw_y_batch = ys[batches]

        x_batch = np.asarray([self.__get_input(x) for x in raw_x_batch)

        y_batch = np.asarray([self.__get_output(y) for y in raw_y_batch])
        

        return x_batch, y_batch
    
    def __getitem__(self, idx):
        
        batches = self.index[idx * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)        
        return X, y
    
    def __len__(self):
        return self.n // self.batch_size