In [12]:
import numpy as np
import random
import tensorflow as tf
from keras.backend import softmax

### Mini Layers

In [2]:
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_ff, d_model, **kwargs):
        super(FeedForward, self).__init__(**kwargs)
        self.fully_connected1 = tf.keras.layers.Dense(d_ff)
        self.fully_connected2 = tf.keras.layers.Dense(d_model)
        self.activation = tf.keras.layers.ReLU()

    def call(self, x):
        x_fc1 = self.fully_connected1(x)
        return self.fully_connected2(self.activation(x_fc1))

In [3]:
class AddNormalization(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AddNormalization, self).__init__(**kwargs)
        self.layer_norm = tf.keras.layers.LayerNormalization()

    def call(self, x, sublayer_x):
        add = x + sublayer_x
        return self.layer_norm(add)

In [9]:
class PositionEmbeddingFixedWeights(tf.keras.layers.Layer):
    def __init__(self, sequence_length, vocab_size, output_dim, **kwargs):
        super(PositionEmbeddingFixedWeights, self).__init__(**kwargs)
        word_embedding_matrix = self.get_position_encoding(vocab_size, output_dim)   
        position_embedding_matrix = self.get_position_encoding(sequence_length, output_dim)                                          
        self.word_embedding_layer = tf.keras.layers.Embedding(
            input_dim=vocab_size, output_dim=output_dim,
            weights=[word_embedding_matrix],
            trainable=False
        )
        self.position_embedding_layer = tf.keras.layers.Embedding(
            input_dim=sequence_length, output_dim=output_dim,
            weights=[position_embedding_matrix],
            trainable=False
        )
             
    def get_position_encoding(self, seq_len, d, n=10000):
        P = np.zeros((seq_len, d))
        for k in range(seq_len):
            for i in np.arange(int(d/2)):
                denominator = np.power(n, 2*i/d)
                P[k, 2*i] = np.sin(k/denominator)
                P[k, 2*i+1] = np.cos(k/denominator)
        return P
 
 
    def call(self, inputs):        
        position_indices = tf.range(tf.shape(inputs)[-1])
        embedded_words = self.word_embedding_layer(inputs)
        embedded_indices = self.position_embedding_layer(position_indices)
        return embedded_words + embedded_indices

### Multi-Head Attention

In [6]:
class DotProductAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(DotProductAttention, self).__init__(**kwargs)
    
    def call(self, queries, keys, values, d_k, mask=None):
        scores = tf.matmul(queries, keys, transpose_b=True) / tf.math.sqrt(cast(d_k, tf.float32))

        if mask is not None:
            scores += -1e-9 * mask

        weights = softmax(scores)

        return tf.matmul(weights, values)


class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, h, d_k, d_v, d_model, **kwargs):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self.attention = DotProductAttention()
        self.heads = h
        self.d_k = d_k
        self.d_v = d_v
        self.d_model = d_model
        self.W_q = tf.keras.layers.Dense(d_k)
        self.W_k = tf.keras.layers.Dense(d_k)
        self.W_v = tf.keras.layers.Dense(d_v)
        self.W_o = tf.keras.layers.Dense(d_model)

    def reshape_tensor(self, x, heads, flag):
        if flag:
            x = tf.reshape(x, shape=(tf.shape(x)[0], tf.shape(x)[1], heads, -1))
            x = tf.transpose(x, perm=(0, 2, 1, 3))
        else:
            x = tf.transpose(x, perm=(0, 2, 1, 3))
            x = tf.reshape(x, shape=(tf.shape(x)[0], tf.shape(x)[1], self.d_k))
        return x

    def call(self, queries, keys, values, mask=None):

        q_reshaped = self.reshape_tensor(self.W_q(queries), self.heads, True)
        
        k_reshaped = self.reshape_tensor(self.W_k(keys), self.heads, True)
        
        v_reshaped = self.reshape_tensor(self.W_v(values), self.heads, True)

        o_reshaped = self.attention(q_reshaped, k_reshaped, v_reshaped, self.d_k, mask)
    
        output = self.reshape_tensor(o_reshaped, self.heads, False)
    
        return self.W_o(output)

### Encoder Layer

In [7]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, h, d_k, d_v, d_model, d_ff, rate, **kwargs):
        super(EncoderLayer, self).__init__(**kwargs)
        self.multihead_attention = MultiHeadAttention(h, d_k, d_v, d_model)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.add_norm1 = AddNormalization()
        self.feed_forward = FeedForward(d_ff, d_model)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.add_norm2 = AddNormalization()

    def call(self, x, padding_mask, training):
        
        multihead_output = self.multihead_attention(x, x, x, padding_mask)

        multihead_output = self.dropout1(multihead_output, training=training)

        addnorm_output = self.add_norm1(x, multihead_output)

        feedforward_output = self.feed_forward(addnorm_output)

        feedforward_output = self.dropout2(feedforward_output, training=training)

        return self.add_norm2(addnorm_output, feedforward_output)

### Encoder

In [10]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, sequence_length, h, d_k, d_v, d_model, d_ff, n, rate, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.pos_encoding = PositionEmbeddingFixedWeights(sequence_length, vocab_size, d_model)
        self.dropout = tf.keras.layers.Dropout(rate)
        self.encoder_layer = [EncoderLayer(h, d_k, d_v, d_model, d_ff, rate) for _ in range(n)]
 
    def call(self, input_sentence, padding_mask, training):

        pos_encoding_output = self.pos_encoding(input_sentence)
  
        x = self.dropout(pos_encoding_output, training=training)
 
        for i, layer in enumerate(self.encoder_layer):
            x = layer(x, padding_mask, training)
 
        return x

### Data Preparation

In [16]:
import json

data_path = '/Users/sauravshrestha/Downloads/data.json'

with open(data_path, 'r') as f:
    data = json.load(f)

In [17]:
training_sentences = []
training_labels = []
labels = []
responses = []

for intent in data['intents']:
  for pattern in intent['patterns']:
    training_sentences.append(pattern)
    training_labels.append(intent['tag'])
  responses.append(intent['responses'])

  if intent['tag'] not in labels:
    labels.append(intent['tag'])

NUM_CLASSES = len(labels)
print(NUM_CLASSES)

7


In [18]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(training_labels)
training_labels = le.transform(training_labels)

In [19]:
vocab_size = 1000
embedding_dim = 32
max_len = 32
oov_token = '<OOV>'

In [20]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)


### Training

In [15]:
enc_vocab_size = 20 # Vocabulary size for the encoder
input_seq_length = 32  # Maximum length of the input sequence
h = 8  # Number of self-attention heads
d_k = 64  # Dimensionality of the linearly projected queries and keys
d_v = 64  # Dimensionality of the linearly projected values
d_ff = 2048  # Dimensionality of the inner fully connected layer
d_model = 512  # Dimensionality of the model sub-layers' outputs
n = 6  # Number of layers in the encoder stack
 
batch_size = 64  # Batch size from the training process
dropout_rate = 0.1  # Frequency of dropping the input units in the dropout layers

In [21]:
inputs = tf.keras.layers.Input(shape=(max_len, ))
encoder = Encoder(enc_vocab_size, input_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)
encoder_out = encoder(inputs, None, True)
x = tf.keras.layers.GlobalAveragePooling1D()(encoder_out)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(64, activation="relu")(x)
x = tf.keras.layers.Dropout(0.1)(x)
outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)


In [22]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32)]              0         
                                                                 
 encoder_1 (Encoder)         (None, 32, 512)           13414016  
                                                                 
 global_average_pooling1d (G  (None, 512)              0         
 lobalAveragePooling1D)                                          
                                                                 
 dropout_26 (Dropout)        (None, 512)               0         
                                                                 
 dense_72 (Dense)            (None, 64)                32832     
                                                                 
 dropout_27 (Dropout)        (None, 64)                0         
                                                             

In [23]:
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history = model.fit(
    padded_sequences,
    np.array(training_labels),
    batch_size=32, epochs=5
)

Epoch 1/5


2023-02-20 15:12:36.186388: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-20 15:12:39.889834: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-02-20 15:12:42.057050: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x16a042a20
2023-02-20 15:12:42.057092: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x16a042a20
2023-02-20 15:12:42.127503: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x16a042a20
2023-02-20 15:12:42.127526: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x16a042a20
2

NotFoundError: Graph execution error:

Detected at node 'StatefulPartitionedCall_98' defined at (most recent call last):
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/asyncio/base_events.py", line 1899, in _run_once
      handle._run()
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2940, in run_cell
      result = self._run_cell(
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2995, in _run_cell
      return runner(coro)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3194, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3373, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3433, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/ys/v8829l1d5ggf0cw6db6kv5f80000gn/T/ipykernel_24902/1025160168.py", line 4, in <module>
      history = model.fit(
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/engine/training.py", line 1027, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 527, in minimize
      self.apply_gradients(grads_and_vars)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1140, in apply_gradients
      return super().apply_gradients(grads_and_vars, name=name)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 634, in apply_gradients
      iteration = self._internal_apply_gradients(grads_and_vars)
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1166, in _internal_apply_gradients
      return tf.__internal__.distribute.interim.maybe_merge_call(
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1216, in _distributed_apply_gradients_fn
      distribution.extended.update(
    File "/Users/sauravshrestha/Documents/ml_project/.env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1211, in apply_grad_to_update_var
      return self._update_step_xla(grad, var, id(self._var_key(var)))
Node: 'StatefulPartitionedCall_98'
could not find registered platform with id: 0x16a042a20
	 [[{{node StatefulPartitionedCall_98}}]] [Op:__inference_train_function_19504]