#All

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import tensorflow_datasets as tfds
import os

##Data Prep

In [None]:
shakespeare_url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
filepath = keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

Downloading data from https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(shakespeare_text)

In [None]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1
train_size = len(encoded) * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [None]:
n_steps = 100
window_length = n_steps + 1
dataset = dataset.window(window_length, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [None]:
batch_size = 256
dataset = dataset.shuffle(10000).batch(batch_size, drop_remainder=True)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

In [None]:
datasets, info = tfds.load("imdb_reviews", as_supervised=True, with_info=True)
train_set, test_set = datasets["train"], datasets["test"]



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.2R0ROY_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.2R0ROY_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.2R0ROY_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


##Preprocessing Data

In [None]:
def preprocess(X_batch, y_batch):
    X_batch = tf.strings.substr(X_batch, 0, 300)
    X_batch = tf.strings.regex_replace(X_batch, b"<br\\s*/?>", b" ")
    X_batch = tf.strings.regex_replace(X_batch, b"[^a-zA-Z']", b" ")
    return X_batch, y_batch

In [None]:
callbacks = [keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]

##Feature Engineering

In [None]:
max_id = len(tokenizer.word_index)
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))
dataset = dataset.prefetch(1)

##Buiding Model stateful GRU

In [None]:
model_char_rnn = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=[None, max_id], batch_size=batch_size),
    keras.layers.GRU(128, return_sequences=True, stateful=True,
                     dropout=0.2),
    keras.layers.GRU(128, return_sequences=True, stateful=True, dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation="softmax"))
])
model_char_rnn.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
history_char_rnn = model_char_rnn.fit(dataset, epochs=10, callbacks=callbacks)



Epoch 1/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m464s[0m 116ms/step - loss: 2.0810
Epoch 2/10


  current = self.get_monitor_value(logs)


[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 116ms/step - loss: 1.7004
Epoch 3/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m460s[0m 117ms/step - loss: 1.6564
Epoch 4/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m511s[0m 119ms/step - loss: 1.6380
Epoch 5/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 116ms/step - loss: 1.6266
Epoch 6/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m512s[0m 118ms/step - loss: 1.6195
Epoch 7/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m506s[0m 119ms/step - loss: 1.6140
Epoch 8/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 119ms/step - loss: 1.6103
Epoch 9/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m494s[0m 117ms/step - loss: 1.6064
Epoch 10/10
[1m3920/3920[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m510s[0m 119ms/step - loss: 1.6038


In [None]:
def generate_text(model, tokenizer, text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        X_new = np.array(tokenizer.texts_to_sequences([text])) - 1
        X_one_hot = tf.one_hot(X_new, depth=max_id)
        y_proba = model.predict(X_one_hot)[0, -1:, :]
        rescaled_logits = tf.math.log(y_proba) / temperature
        char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
        text += tokenizer.sequences_to_texts(char_id.numpy())[0]
    return text

##pipeline

In [None]:
vocab_size = 1000
batch_size = 32

text_vec_layer = keras.layers.TextVectorization(max_tokens=vocab_size, output_sequence_length=100)
text_vec_layer.adapt(train_set.map(lambda x, y: x))

# Preprocessing pipeline
train_set_proc = train_set.batch(batch_size).map(preprocess)
train_set_proc = train_set_proc.map(lambda X, y: (text_vec_layer(X), y)).prefetch(1)

test_set_proc = test_set.batch(batch_size).map(preprocess)
test_set_proc = test_set_proc.map(lambda X, y: (text_vec_layer(X), y)).prefetch(1)

##Building Model Sentiment Analysis

In [None]:
embed_size = 128
model_sentiment = keras.models.Sequential([
    keras.layers.Embedding(vocab_size, embed_size, mask_zero=True),
    keras.layers.GRU(128, return_sequences=True),
    keras.layers.GRU(128),
    keras.layers.Dense(1, activation="sigmoid")
])
model_sentiment.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
history_sentiment = model_sentiment.fit(train_set_proc, epochs=20, validation_data=test_set_proc,callbacks=callbacks)

Epoch 1/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 26ms/step - accuracy: 0.5981 - loss: 0.6427 - val_accuracy: 0.7453 - val_loss: 0.5022
Epoch 2/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 23ms/step - accuracy: 0.7525 - loss: 0.4975 - val_accuracy: 0.7522 - val_loss: 0.4915
Epoch 3/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.7719 - loss: 0.4647 - val_accuracy: 0.7542 - val_loss: 0.4893
Epoch 4/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.7883 - loss: 0.4395 - val_accuracy: 0.7529 - val_loss: 0.4937
Epoch 5/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 17ms/step - accuracy: 0.8023 - loss: 0.4143 - val_accuracy: 0.7476 - val_loss: 0.5082
Epoch 6/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 17ms/step - accuracy: 0.8160 - loss: 0.3904 - val_accuracy: 0.7418 - val_loss: 0.5239
Epoch 7/20
[1m7

##Block Transformer

In [None]:
class PositionalEncoding(keras.layers.Layer):
    def __init__(self, max_steps, max_dims, dtype=tf.float32, **kwargs):
        super().__init__(dtype=dtype, **kwargs)
        if max_dims % 2 == 1: max_dims += 1
        p, i = np.meshgrid(np.arange(max_steps), np.arange(max_dims // 2))
        pos_emb = np.empty((1, max_steps, max_dims))
        pos_emb[0, :, ::2] = np.sin(p / 10000**(2 * i / max_dims)).T
        pos_emb[0, :, 1::2] = np.cos(p / 10000**(2 * i / max_dims)).T
        self.positional_embedding = tf.constant(pos_emb.astype(self.dtype))
    def call(self, inputs):
        shape = tf.shape(inputs)
        return inputs + self.positional_embedding[:, :shape[-2], :shape[-1]]

In [None]:
class MultiHeadAttention(keras.layers.Layer):
    def __init__(self, n_heads, d_model, causal=False, **kwargs):
        super().__init__(**kwargs)
        self.n_heads = n_heads
        self.d_model = d_model # Store d_model
        self.causal = causal
    def build(self,input_shapes):
        self.d_keys = input_shapes[0][-1] # d_keys is the dimension of the input
        self.d_values = input_shapes[0][-1] # d_values is the dimension of the input
        # self.d_model is now passed in __init__
        self.wq = [keras.layers.Dense(self.d_keys) for _ in range(self.n_heads)]
        self.wk = [keras.layers.Dense(self.d_keys) for _ in range(self.n_heads)]
        self.wv = [keras.layers.Dense(self.d_values) for _ in range(self.n_heads)]
        self.wo = keras.layers.Dense(self.d_model) # Output dense layer with d_model units
    def call(self, inputs):
        q, v, k = inputs
        heads = []
        for i in range(self.n_heads):
            q_head, k_head, v_head = self.wq[i](q), self.wk[i](k), self.wv[i](v)
            k_transposed = tf.transpose(k_head, [0, 2, 1])
            attention_scores = (q_head @ k_transposed) / tf.math.sqrt(tf.cast(self.d_keys, dtype=tf.float32)) # Cast to float32
            attention_weights = tf.nn.softmax(attention_scores)
            attention_head = attention_weights @ v_head
            heads.append(attention_head)
        heads_concatenated = tf.concat(heads, axis=-1)
        return self.wo(heads_concatenated)

In [None]:
class TransformerBlock(keras.layers.Layer):
    def __init__(self, n_heads, d_model, **kwargs):
        super().__init__(**kwargs)
        self.attention = MultiHeadAttention(n_heads, d_model)
        self.norm1 = keras.layers.LayerNormalization()
        self.norm2 = keras.layers.LayerNormalization()
        self.feed_forward = keras.models.Sequential([
            keras.layers.Dense(d_model * 4, activation="relu"),
            keras.layers.Dense(d_model)
        ])
    def call(self, inputs):
        attention_output = self.attention([inputs, inputs, inputs])
        x = self.norm1(inputs + attention_output)
        ff_output = self.feed_forward(x)
        return self.norm2(x + ff_output)

In [None]:
vocab_size = 1000
embed_size = 128
n_heads = 8
d_model = embed_size

input_sequences = np.random.randint(vocab_size, size=(2, 10))

In [None]:
embedding_layer = keras.layers.Embedding(vocab_size, embed_size)
pos_encoding_layer = PositionalEncoding(max_steps=10, max_dims=embed_size)
transformer_block = TransformerBlock(n_heads=n_heads, d_model=d_model) # Pass d_model here

In [None]:
embedding_output = embedding_layer(input_sequences)
pos_encoded_output = pos_encoding_layer(embedding_output)
transformer_output = transformer_block(pos_encoded_output)

In [None]:
print(pos_encoded_output.shape)
print(transformer_output.shape)
print(transformer_output[0, 0, :10])

(2, 10, 128)
(2, 10, 128)
tf.Tensor(
[-3.0446513   1.6366378  -0.3542529   0.41864032  0.10543233  0.26620412
  0.9393467   0.46621236 -0.9112601   2.4177976 ], shape=(10,), dtype=float32)
