In [1]:
# imports

import logging
import time

import numpy as np
import matplotlib.pyplot as plt

import tensorflow_datasets as tfds
import tensorflow as tf

import tensorflow_text


  from .autonotebook import tqdm as notebook_tqdm
2023-03-17 05:02:46.090590: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-17 05:02:46.191478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.6/lib64:
2023-03-17 05:02:46.191495: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-17 05:02:46.835093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.

In [2]:
# The dataset preparatoin code

In [3]:
def positional_encoding(length, depth):
  depth = depth/2

  positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
  depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)

  angle_rates = 1 / (10000**depths)         # (1, depth)
  angle_rads = positions * angle_rates      # (pos, depth)

  pos_encoding = np.concatenate(
      [np.sin(angle_rads), np.cos(angle_rads)],
      axis=-1)

  return tf.cast(pos_encoding, dtype=tf.float32)


In [4]:
class PositionalEmbedding(tf.keras.layers.Layer):
  def __init__(self, vocab_size, d_model):
    super().__init__()
    self.d_model = d_model
    self.embedding = tf.keras.layers.Embedding(
        vocab_size, d_model, mask_zero=True)
    self.pos_encoding = positional_encoding(length=2048, depth=d_model)

  def compute_mask(self, *args, **kwargs):
    return self.embedding.compute_mask(*args, **kwargs)

  def call(self, x):
    length = tf.shape(x)[1]
    x = self.embedding(x)
    # This factor sets the relative scale of the embedding and positonal_encoding.
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x = x + self.pos_encoding[tf.newaxis, :length, :]
    return x


In [5]:
class BaseAttention(tf.keras.layers.Layer):
  def __init__(self, **kwargs):
    super().__init__()
    self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
    self.layernorm = tf.keras.layers.LayerNormalization()
    self.add = tf.keras.layers.Add()

class CrossAttention(BaseAttention):

  def call(self, x, context):
    attn_output, attn_scores = self.mha(
        query=x,
        key=context,
        value=context,
        return_attention_scores=True)

    # Cache the attention scores for plotting later.
    self.last_attn_scores = attn_scores

    x = self.add([x, attn_output])
    x = self.layernorm(x)

    return x


class GlobalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(
        query=x,
        value=x,
        key=x)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x


class CausalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(
        query=x,
        value=x,
        key=x,
        use_causal_mask=True)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x


In [6]:
class FeedForward(tf.keras.layers.Layer):
  def __init__(self, d_model, dff, dropout_rate=0.1):
    super().__init__()
    self.seq = tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),
        tf.keras.layers.Dense(d_model),
        tf.keras.layers.Dropout(dropout_rate)
    ])
    self.add = tf.keras.layers.Add()
    self.layer_norm = tf.keras.layers.LayerNormalization()

  def call(self, x):
    x = self.add([x, self.seq(x)])
    x = self.layer_norm(x)
    return x


In [7]:
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self, *, d_model, num_heads, dff, dropout_rate=0.1):
    super().__init__()

    self.self_attention = GlobalSelfAttention(
        num_heads=num_heads,
        key_dim=d_model,
        dropout=dropout_rate)

    self.ffn = FeedForward(d_model, dff)

  def call(self, x):
    x = self.self_attention(x)
    x = self.ffn(x)
    return x


In [8]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads,
               dff, vocab_size, dropout_rate=0.1):
    super().__init__()

    self.d_model = d_model
    self.num_layers = num_layers

    self.pos_embedding = PositionalEmbedding(
        vocab_size=vocab_size, d_model=d_model)

    self.enc_layers = [
        EncoderLayer(d_model=d_model,
                     num_heads=num_heads,
                     dff=dff,
                     dropout_rate=dropout_rate)
        for _ in range(num_layers)]
    self.dropout = tf.keras.layers.Dropout(dropout_rate)

  def call(self, x):
    # `x` is token-IDs shape: (batch, seq_len)
    x = self.pos_embedding(x)  # Shape `(batch_size, seq_len, d_model)`.

    # Add dropout.
    x = self.dropout(x)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x)

    return x  # Shape `(batch_size, seq_len, d_model)`.


In [9]:
class DecoderLayer(tf.keras.layers.Layer):
  def __init__(self,
               *,
               d_model,
               num_heads,
               dff,
               dropout_rate=0.1):
    super(DecoderLayer, self).__init__()

    self.causal_self_attention = CausalSelfAttention(
        num_heads=num_heads,
        key_dim=d_model,
        dropout=dropout_rate)

    self.cross_attention = CrossAttention(
        num_heads=num_heads,
        key_dim=d_model,
        dropout=dropout_rate)

    self.ffn = FeedForward(d_model, dff)

  def call(self, x, context):
    x = self.causal_self_attention(x=x)
    x = self.cross_attention(x=x, context=context)

    # Cache the last attention scores for plotting later
    self.last_attn_scores = self.cross_attention.last_attn_scores

    x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
    return x


In [10]:
class Decoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size,
               dropout_rate=0.1):
    super(Decoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers

    self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size,
                                             d_model=d_model)
    self.dropout = tf.keras.layers.Dropout(dropout_rate)
    self.dec_layers = [
        DecoderLayer(d_model=d_model, num_heads=num_heads,
                     dff=dff, dropout_rate=dropout_rate)
        for _ in range(num_layers)]

    self.last_attn_scores = None

  def call(self, x, context):
    # `x` is token-IDs shape (batch, target_seq_len)
    x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)

    x = self.dropout(x)

    for i in range(self.num_layers):
      x = self.dec_layers[i](x, context)

    self.last_attn_scores = self.dec_layers[-1].last_attn_scores

    # The shape of x is (batch_size, target_seq_len, d_model).
    return x


In [11]:
class Transformer(tf.keras.Model):
  def __init__(self, *, num_layers, d_model, num_heads, dff,
               input_vocab_size, target_vocab_size, dropout_rate=0.1):
    super().__init__()
    self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
                           num_heads=num_heads, dff=dff,
                           vocab_size=input_vocab_size,
                           dropout_rate=dropout_rate)

    self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
                           num_heads=num_heads, dff=dff,
                           vocab_size=target_vocab_size,
                           dropout_rate=dropout_rate)

    self.final_layer = tf.keras.layers.Dense(target_vocab_size)

  def call(self, inputs):
    # To use a Keras model with `.fit` you must pass all your inputs in the
    # first argument.
    context, x = inputs

    context = self.encoder(context)  # (batch_size, context_len, d_model)

    x = self.decoder(x, context)  # (batch_size, target_len, d_model)

    # Final linear layer output.
    logits = self.final_layer(x)  # (batch_size, target_len, target_vocab_size)

    try:
      # Drop the keras mask, so it doesn't scale the losses/metrics.
      # b/250038731
      del logits._keras_mask
    except AttributeError:
      pass

    # Return the final output and the attention weights.
    return logits


In [12]:
import string
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.model_selection import train_test_split


In [13]:

df = pd.read_csv("IMDB Dataset.csv")

# Pre-processing fucntions


def clear_html(text):
    return BeautifulSoup(text, "html.parser").get_text()


def clear_punctuations(text):
    return text.translate(str.maketrans("", "", string.punctuation))


def sentiment_converter(text):
    if text == "positive":
        return 1
    else:
        return 0
# %% Pre-processing part 1


df["review"] = df["review"].apply(clear_html)
df["review"] = df["review"].apply(clear_punctuations)
df["sentiment"] = df["sentiment"].apply(sentiment_converter)
df.head()

# %% Pre-processing part2 tokenzing and spliting

tokenizer = tf.keras.preprocessing.text.Tokenizer(
    num_words=10_000, oov_token='<OOV>')
tokenizer.fit_on_texts(df["review"])
sequences = tokenizer.texts_to_sequences(df["review"])
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(
    sequences, maxlen=128, padding="post", truncating="post")

X_train, X_val, y_train, y_val = train_test_split(
    padded_sequences, df["sentiment"], test_size=0.2, random_state=42)

X_train[0:5],y_train[0:5]

  return BeautifulSoup(text, "html.parser").get_text()


(array([[ 176,   47,   10,  760, 2089,  518,  294,    2,  104, 1823, 1987,
         4449, 6593,    3,  805, 5243,   12,    1,    2,    1,  228,    2,
         6095,   83,  864,   54,   52,   22,  102,    5,    2, 4348,  100,
           35,   25,   38,  114, 1135,   12,    9,    7, 2287, 1150,    6,
          449,   47,  537,    6,   93,   34,   23,   40,  935,  427,    1,
           16,    2,  173,    6, 3009,   24,    1, 4660,   20,    4, 2940,
           12,   43,   74,  223,   71,  123,    8,   78, 3122,  190,   20,
          238,    3,    2,    1,  208, 4930,  144,   21,   62,   28,   16,
            1,   81,  354,  294,    4,   19,   18,    9,  208,   26,  293,
           12,    1,    1,   14,    2, 8617,    1,  116,  437,    3,    1,
            1,   14,    2, 1570,    1,  640,   65,  405,  377,   10,   89,
          117,   47,  111,  374,   34,    1,   36],
        [  10,  112,   21,  103,    2,  418,   17,   10,   96,   21,  103,
            2,  418,   17,   10, 2248,    2,  27

In [17]:
num_layers = 4
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1


transformer = Transformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    input_vocab_size=128,
    target_vocab_size=1,
    dropout_rate=dropout_rate)

In [42]:
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(1e-4)
# print(X_train.shape)
batch_size = 128
# makae transformer.build to work

# transformer.summary()
# transformer.compile(optimizer=optimizer, loss=loss_object)

In [27]:
num_epochs = 10
batch_size = 64

train_dataset = tf.data.Dataset.from_tensor_slices(
    (X_train, y_train)).shuffle(buffer_size=10000).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices(
    (X_val, y_val)).batch(batch_size)


@tf.function
def train_step(model, input, target):
    # Forward pass
    with tf.GradientTape() as tape:
        predictions = model(input)
        loss = loss_object(target, predictions)

    # Backward pass
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss


for epoch in range(num_epochs):
    for (batch, (inputs, targets)) in enumerate(train_dataset):
        train_step(inputs, targets)


TypeError: in user code:


    TypeError: outer_factory.<locals>.inner_factory.<locals>.tf__train_step() missing 1 required positional argument: 'target'


In [21]:
batch_size = 64
num_epochs = 10

In [22]:
train_dataset = tf.data.Dataset.from_tensor_slices(
    (X_train, y_train)).shuffle(buffer_size=10000).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices(
    (X_val, y_val)).batch(batch_size)

for epoch in range(num_epochs):
    train_loss = 0.0
    val_loss = 0.0
    train_batches = 0
    val_batches = 0

    # Train loop
    for batch, (input, target) in enumerate(train_dataset):
        loss = train_step(transformer, input, target)
        train_loss += loss
        train_batches += 1

        if batch % 100 == 0:
            print(f"Epoch {epoch + 1}, Batch {batch}, Train Loss {loss:.4f}")

    # Validation loop
    for batch, (input, target) in enumerate(val_dataset):
        predictions = transformer(input, training=False)
        loss = loss_object(target, predictions)
        val_loss += loss
        val_batches += 1

    train_loss /= train_batches
    val_loss /= val_batches

    print(
        f"Epoch {epoch + 1}, Train Loss {train_loss:.4f}, Val Loss {val_loss:.4f}")


OperatorNotAllowedInGraphError: in user code:

    File "/tmp/ipykernel_3880262/2395128574.py", line 5, in train_step  *
        predictions = model(input)
    File "/home/d3vil-server/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None

    OperatorNotAllowedInGraphError: Exception encountered when calling layer 'transformer_3' (type Transformer).
    
    in user code:
    
        File "/tmp/ipykernel_3880262/3136393357.py", line 20, in call  *
            context, x = inputs
    
        OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
    
    
    Call arguments received by layer 'transformer_3' (type Transformer):
      • inputs=tf.Tensor(shape=(64, 128), dtype=int32)
