In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# loading the datasets seperately because spliting with indices
# only allows using one element to be selected 
ds_test, ds_test_info = tfds.load(
    'genomics_ood',
    split='test[:1000]',
    shuffle_files=True, # automatically shuffels the files
    as_supervised=True, # ds is turned into a tuple
    # if as_supervised=False: will be a dictionary with all the features
    with_info=True, # contains information about the builder
)

ds_train, ds_train_info = tfds.load(
    'genomics_ood',
    split='train[:100000]',
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)



In [3]:
def transformFunction(seq, label):
    vocab = {"A":"1", "C":"2", "G":"3", "T":"0"}
    for key in vocab.keys():
        seq = tf.strings.regex_replace(seq, key, vocab[key])
    split = tf.strings.bytes_split(seq)
    labels = tf.cast(tf.strings.to_number(split), tf.uint8)
    seq = tf.one_hot(labels, 4)
    seq = tf.reshape(seq, (-1,))

    label = tf.one_hot(label, 10)
    return seq, label


In [4]:
ds_train = ds_train.map(transformFunction)
ds_train = ds_train.cache() # caches the dataset → saving time (no opening of files etc.)
ds_train = ds_train.shuffle(buffer_size=100000) # buffer size should be the size of the dataset
ds_train = ds_train.batch(32)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
# alternatively: ds_train.prefetch(buffer_size) 
# buffer_size :=  the maximum number of elements that will be buffered when prefetching.  

# while model is executing training step s
# the pipeline is reading the data for the next step

# elements to prefetch should be equal to (or possibly greater than) 
# the number of batches consumed by a single training step


ds_test = ds_test.map(transformFunction)
ds_test = ds_test.cache()
ds_test = ds_train.shuffle(buffer_size=1000)
ds_test = ds_test.batch(32)
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)



In [5]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Layer

model = tf.keras.Sequential(name="myModel")
model.add(tf.keras.Input(shape=(1000), name="Input"))
model.add(tf.keras.layers.Dense(256, activation=tf.keras.activations.sigmoid, name="hidden_1"))
model.add(tf.keras.layers.Dense(256, activation=tf.keras.activations.sigmoid, name="hidden_2"))
model.add(tf.keras.layers.Dense(10, activation=tf.keras.activations.softmax, name="otuput0"))

opt = tf.keras.optimizers.SGD(learning_rate=0.1, name='SGD')
loss_fn = tf.keras.losses.categorical_crossentropy

model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])
model.summary()

Model: "myModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_1 (Dense)             (None, 256)               256256    
_________________________________________________________________
hidden_2 (Dense)             (None, 256)               65792     
_________________________________________________________________
otuput0 (Dense)              (None, 10)                2570      
Total params: 324,618
Trainable params: 324,618
Non-trainable params: 0
_________________________________________________________________


### Run with GPU

In [6]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
   tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)])
  except RuntimeError as e:
   print(e)
else:
    history = model.fit(ds_train, epochs=10, validation_data=ds_test)

with tf.device("/device:GPU:0"):
    history = model.fit(ds_train, epochs=10, validation_data=ds_test)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

KeyboardInterrupt: 

In [7]:
x_axis = np.arange(1, 11)


fig, ax = plt.subplots(1,2)
ax[0].plot(x_axis, history.history["loss"])
ax[0].plot(x_axis, history.history["val_loss"])
ax[1].plot(x_axis, history.history["accuracy"])
ax[1].plot(x_axis, history.history["val_accuracy"])


NameError: name 'history' is not defined