In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed datasets-2

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Load the conv_ai_3 dataset
ds = tfds.load('huggingface:conv_ai_3/conv_ai_3', split='train')

# Preprocess the text data
tokenizer = tfds.deprecated.text.Tokenizer()
vocabulary_set = set()

for example in ds:
    text = example['question'].numpy().decode('utf-8')  # Access the question text
    tokens = tokenizer.tokenize(text)
    vocabulary_set.update(tokens)

encoder = tfds.deprecated.text.TokenTextEncoder(list(vocabulary_set))  # Convert vocabulary_set to list

Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/conv_ai_3/conv_ai_3/1.0.0...


Downloading data:   0%|          | 0.00/321k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/85.9k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9176 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2313 [00:00<?, ? examples/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/conv_ai_3/conv_ai_3/1.0.0.incompleteJOXLJ2/conv_ai_3-train.tfrecord*...:  …

Generating validation examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/conv_ai_3/conv_ai_3/1.0.0.incompleteJOXLJ2/conv_ai_3-validation.tfrecord*.…

Dataset conv_ai_3 downloaded and prepared to /root/tensorflow_datasets/conv_ai_3/conv_ai_3/1.0.0. Subsequent calls will reuse this data.


In [4]:
# Define the Generator model
class Generator(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super(Generator, self).__init__()
        self.embedding = Embedding(vocab_size, embedding_dim)
        self.lstm = LSTM(rnn_units, return_sequences=True, return_state=True)
        self.dense = Dense(vocab_size, activation='softmax')

    def call(self, x, hidden):
        x = self.embedding(x)
        x, state_h, state_c = self.lstm(x, initial_state=hidden)
        x = self.dense(x)
        return x[:, -1, :], [state_h, state_c]

# Define the Discriminator model
class Discriminator(tf.keras.Model):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.dense = Dense(64, activation='relu')

    def call(self, x):
        x = tf.reshape(x, [-1, x.shape[1], 1])
        x = self.dense(x)
        return x

In [5]:
# Define the loss function and optimizer
loss_object = BinaryCrossentropy(from_logits=True)
generator_optimizer = Adam(learning_rate=0.001)
discriminator_optimizer = Adam(learning_rate=0.001)

# Define the loss function for the Generator
def generator_loss(fake_output):
    return loss_object(tf.ones_like(fake_output), fake_output)

# Define the loss function for the Discriminator
def discriminator_loss(real_output, fake_output):
    real_loss = loss_object(tf.ones_like(real_output), real_output)
    fake_loss = loss_object(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [6]:
# Define the training step
@tf.function
def train_step(real_text):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        initial_state = [tf.zeros((1, rnn_units)), tf.zeros((1, rnn_units))]

        generated_text, _ = generator(real_text, initial_state)
        real_output = discriminator(real_text)
        fake_output = discriminator(generated_text)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    return gen_loss, disc_loss

In [7]:
# Define hyperparameters
embedding_dim = 256
rnn_units = 1024
vocab_size = len(vocabulary_set)

# Initialize the Generator and Discriminator models
generator = Generator(vocab_size, embedding_dim, rnn_units)
discriminator = Discriminator()

In [None]:
EPOCHS = 10
reference_texts = []  
generated_texts = []

for epoch in range(EPOCHS):
    for example in ds:
        text = example['question'].numpy().decode('utf-8')
        tokens = tokenizer.tokenize(text)
        filtered_tokens = [token for token in tokens if token in vocabulary_set]
        if filtered_tokens:
            encoded_text = encoder.encode(' '.join(filtered_tokens))
            real_text = tf.expand_dims(encoded_text, 0)
            real_text = pad_sequences(real_text, padding='post')

            try:
                gen_loss, disc_loss = train_step(real_text)
            except tf.errors.InvalidArgumentError as error:
                print("InvalidArgumentError:", error)
                continue

    # Generating text for evaluation
    if epoch % 2 == 0:
        generated_text = generate_text()
        generated_texts.append(tokenizer.tokenize(generated_text))

    print(f'Epoch {epoch+1}, Generator Loss: {gen_loss}, Discriminator Loss: {disc_loss}')

# Compute BLEU score after training
bleu_score = corpus_bleu([reference_texts], generated_texts)
print(f"Final BLEU Score: {bleu_score}")


InvalidArgumentError: Graph execution error:

Detected at node generator/embedding/embedding_lookup defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/lo

In [None]:
# Plotting the BLEU scores
plt.figure(figsize=(10, 5))
plt.plot(range(1, EPOCHS + 1), bleu_scores, marker='o', linestyle='-', color='b')
plt.title('BLEU Score Over Training Epochs')
plt.xlabel('Epoch')
plt.ylabel('BLEU Score')
plt.grid(True)
plt.show()