In [2]:
import tensorflow as tf

# Example dataset: List of sentences and corresponding labels
sentences = [
    "TensorFlow is great for machine learning",
    "Natural language processing is fun",
    "I love creating deep learning models",
    "Transformers have revolutionized NLP",
    "TensorFlow Hub provides pre-trained models"
]
labels = [1, 0, 1, 0, 1]  # Example binary labels

# Create a tf.data Dataset from sentences and labels
data = tf.data.Dataset.from_tensor_slices((sentences, labels))

# Text vectorization layer to tokenize and convert text to sequences
vectorizer = tf.keras.layers.TextVectorization(output_mode='int', max_tokens=1000)
vectorizer.adapt(sentences)  # Fit the vectorizer on the data

# Pipeline: Shuffle, batch, tokenize, and prefetch
pipeline = (
    data
    .shuffle(buffer_size=5)  # Shuffle the sentences
    .batch(2)                # Batch data in groups of 2
    .map(lambda x, y: (vectorizer(x), y))  # Tokenize sentences
    .prefetch(buffer_size=tf.data.AUTOTUNE)  # Prefetch for efficiency
)

# Print the output from each batch
for batch, labels in pipeline:
    print("Batch (tokenized):", batch.numpy())
    print("Labels:", labels.numpy())

Batch (tokenized): [[ 6 18  7 11  0  0]
 [ 2  5 19 21 13  4]]
Labels: [0 1]
Batch (tokenized): [[12 15  9  5 20  0]
 [16 14 23 22  4  3]]
Labels: [0 1]
Batch (tokenized): [[ 2 17  8 10  3]]
Labels: [1]


In [3]:
import tensorflow as tf

# Example dataset: List of sentences and corresponding labels
sentences = [
    "TensorFlow is great for machine learning",
    "Natural language processing is fun",
    "I love creating deep learning models",
    "Transformers have revolutionized NLP",
    "TensorFlow Hub provides pre-trained models"
]
labels = [1, 0, 1, 0, 1]  # Example binary labels

# Create a tf.data Dataset from sentences and labels
data = tf.data.Dataset.from_tensor_slices((sentences, labels))
data

<_TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int32, name=None))>

In [4]:
vectorizer = tf.keras.layers.TextVectorization(output_mode='int', max_tokens=1000)
vectorizer.adapt(sentences)  # Fit the vectorizer on the data
vectorizer

<TextVectorization name=text_vectorization_2, built=False>

In [5]:
# Pipeline: Shuffle, batch, tokenize, and prefetch
pipeline = (
    data
    .shuffle(buffer_size=5)  # Shuffle the sentences
    .batch(2)                # Batch data in groups of 2
    .map(lambda x, y: (vectorizer(x), y))  # Tokenize sentences
    .prefetch(buffer_size=tf.data.AUTOTUNE)  # Prefetch for efficiency
)
pipeline

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, None), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [1]:
import tensorflow as tf

# Simulate a dataset of integers from 0 to 99
data = tf.data.Dataset.range(100)

# Shuffle, batch, and map transformations
pipeline = (
    data
    .shuffle(buffer_size=100)  # Randomize the data
    .batch(10)                 # Batch in groups of 10
    .map(lambda x: x * 2)      # Multiply each element by 2
    .prefetch(buffer_size=tf.data.AUTOTUNE)  # Prefetch for efficiency
)

# Print the output from each batch
for batch in pipeline:
    print(batch.numpy())

[ 62 198  24  30 148  44  12  72 164 118]
[150  46 172  80   8  90  64   4 138 192]
[ 40  22 102 196  18  78  82  88 176  54]
[ 36 106 180  58  14 158 130 162 146  66]
[170 194  94 110 116 122  34 184  16 108]
[132  20 190  92  38   0  26 128  52  74]
[ 98 182  56 100  76  28 174 152 166 160]
[ 96 120   6 144  84 112 188  10 168 156]
[ 50  68  70 104  48  60 142  42 134 154]
[114 124 136 178  86 126   2  32 140 186]


In [2]:
sentences = [
    "TensorFlow is great for machine learning",
    "Natural language processing is fun",
    "I love creating deep learning models",
    "Transformers have revolutionized NLP",
    "TensorFlow Hub provides pre-trained models"
]
labels = [1, 0, 1, 0, 1]  # Example binary labels

# Create a tf.data Dataset from sentences and labels
data = tf.data.Dataset.from_tensor_slices((sentences, labels))
for sentence, label in data:
    print("Sentence:", sentence.numpy().decode('utf-8'))
    print("Label:", label.numpy())

Sentence: TensorFlow is great for machine learning
Label: 1
Sentence: Natural language processing is fun
Label: 0
Sentence: I love creating deep learning models
Label: 1
Sentence: Transformers have revolutionized NLP
Label: 0
Sentence: TensorFlow Hub provides pre-trained models
Label: 1
