In [16]:
import re
import os
import numpy as np
import tensorflow as tf


In [27]:
SEQ_LENGTH = 10  # sequence length for each line of the sonnet
BATCH_SIZE = 128  # batch size for training
BUFFER_SIZE = 10000  # buffer size for shuffling the data
EMBEDDING_DIM = 256  # embedding dimension for the generator and discriminator
GEN_HIDDEN_DIM = 512  # number of hidden units in the generator
DISC_HIDDEN_DIM = 512  # number of hidden units in the discriminator
DROPOUT_RATE = 0.3  # dropout rate for the generator and discriminator
LEARNING_RATE = 1e-4  # learning rate for the optimizer
EPOCHS = 100  # number of epochs to train for

In [19]:
# read the sonnets from the file
with open("./data/Sonnet.txt", "r") as f:
    text = f.read()

# split the text into sonnets
sonnets = re.split("\n\n", text)

# split the sonnets into lines
lines = []
for sonnet in sonnets:
    sonnet_lines = sonnet.split("\n")
    for line in sonnet_lines:
        if line.strip() != "":
            lines.append(line.strip())

In [38]:
def remove_punctuation(text):
    # define the regex pattern for punctuation
    pattern = r"[^\w\s]"

    # remove all occurrences of the pattern from the text
    text = re.sub(pattern, "", text)

    return text

In [43]:
for i in range(len(lines)):
    lines[i] = remove_punctuation(lines[i])

In [50]:
# tokenize the words
tokenizer = tf.keras.preprocessing.text.Tokenizer(filters="", lower=False)
tokenizer.fit_on_texts(lines)
word_index = tokenizer.word_index
vocab_size = len(word_index)

# convert the lines to sequences of token IDs
sequences = tokenizer.texts_to_sequences(lines)

# pad the sequences to a fixed length
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(
    sequences, maxlen=9, padding="post", truncating="post"
)

In [55]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(vocab_size + 1, EMBEDDING_DIM, input_shape=(SEQ_LENGTH,)))
    model.add(tf.keras.layers.LSTM(GEN_HIDDEN_DIM, dropout=DROPOUT_RATE, return_sequences=True))
    model.add(tf.keras.layers.LSTM(GEN_HIDDEN_DIM, dropout=DROPOUT_RATE))
    model.add(tf.keras.layers.Dense(SEQ_LENGTH, activation="softmax"))
    return model

def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(vocab_size + 1, EMBEDDING_DIM, input_shape=(SEQ_LENGTH,)))
    model.add(tf.keras.layers.LSTM(DISC_HIDDEN_DIM, dropout=DROPOUT_RATE, return_sequences=True))
    model.add(tf.keras.layers.LSTM(DISC_HIDDEN_DIM, dropout=DROPOUT_RATE))
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
    return model

In [56]:
# define the number of hidden units for the generator and discriminator
GEN_HIDDEN_UNITS = 128
DISC_HIDDEN_UNITS = 64

# define the learning rate for the optimizer
LEARNING_RATE = 0.0002

In [61]:
# define the generator model
def make_generator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(GEN_HIDDEN_UNITS, input_shape=(SEQ_LENGTH,), activation='relu'),
        tf.keras.layers.Dense(SEQ_LENGTH, activation='softmax')
    ])
    return model


In [62]:
# define the discriminator model
def make_discriminator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(DISC_HIDDEN_UNITS, input_shape=(SEQ_LENGTH,), activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

In [63]:
# compile the generator and discriminator models into a GAN
def make_gan_model(generator, discriminator):
    discriminator.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss='binary_crossentropy')
    discriminator.trainable = False

    gan = tf.keras.Sequential([
        generator,
        discriminator
    ])

    gan.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss='binary_crossentropy')
    return gan
    

In [64]:
 # create an array of real labels and fake labels for training the discriminator
real_labels = np.ones((BATCH_SIZE, 1))
fake_labels = np.zeros((BATCH_SIZE, 1))

# create the generator, discriminator, and GAN models
generator = make_generator_model()
discriminator = make_discriminator_model()
gan = make_gan_model(generator, discriminator)

In [66]:

# train the GAN
for epoch in range(100):
    # select a random batch of real sonnets
    real_samples = padded_sequences[np.random.randint(0, len(padded_sequences), size=BATCH_SIZE)]

    # generate a batch of fake sonnets using the generator
    noise = np.random.normal(0, 1, (32, SEQ_LENGTH))
    fake_samples = generator.predict(noise)

    # train the discriminator on the real and fake sonnets
    discriminator_loss_real = discriminator.train_on_batch(real_samples, real_labels)
    discriminator_loss_fake = discriminator.train_on_batch(fake_samples, fake_labels)
    discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)

    # generate a new batch of noise for the generator
    noise = np.random.normal(0, 1, (BATCH_SIZE, SEQ_LENGTH))

    # train the generator to fool the discriminator
    generator_loss = gan.train_on_batch(noise, real_labels)

    # print the loss values for the discriminator and generator
    print(f"Epoch {epoch+1}/{100}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}")



ValueError: in user code:

    File "c:\Users\ADMIN\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\ADMIN\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\ADMIN\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\ADMIN\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\ADMIN\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\ADMIN\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_7" is incompatible with the layer: expected shape=(None, 10), found shape=(128, 9)
