<a href="https://colab.research.google.com/github/mdzikrim/Hands-on_DL/blob/main/Chapter_16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import random

reber_grammar = {
    'B': ['T', 'P'],
    'T': ['X', 'S'],
    'P': ['V', 'S'],
    'X': ['X'],
    'S': ['E'],
    'V': ['V'],
    'E': []
}

def generate_string(grammar, current='B', max_len=20):
    if current == 'E' or max_len <= 0:
        return 'E' if current == 'E' else ''
    next_symbol = random.choice(grammar[current])
    return current + generate_string(grammar, next_symbol, max_len - 1)

print("Sample:", generate_string(reber_grammar))

Sample: BPSE


In [5]:
def mutate_string(s):
    idx = random.randint(0, len(s)-2)
    c = random.choice([ch for ch in "BTPXSV" if ch != s[idx]])
    return s[:idx] + c + s[idx+1:]

X, y = [], []
for _ in range(5000):
    valid = generate_string(reber_grammar)
    X.append(valid)
    y.append(1)

    invalid = mutate_string(valid)
    X.append(invalid)
    y.append(0)

print(X[:5], y[:5])


['BPSE', 'SPSE', 'BPSE', 'PPSE', 'BTSE'] [1, 0, 1, 0, 1]


In [6]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X_pad = pad_sequences(X_seq, maxlen=20, padding='post')


In [7]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=8),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()


In [9]:
from sklearn.model_selection import train_test_split
import numpy as np

X_train, X_val, y_train, y_val = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Convert data to float32
X_train = X_train.astype(np.float32)
X_val = X_val.astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
y_val = np.array(y_val).astype(np.float32)


model.fit(X_train, y_train, epochs=5, validation_data=(X_val, y_val), batch_size=32)

Epoch 1/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.5094 - loss: 0.6931 - val_accuracy: 0.5885 - val_loss: 0.6885
Epoch 2/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.5831 - loss: 0.6680 - val_accuracy: 0.7780 - val_loss: 0.4696
Epoch 3/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.7909 - loss: 0.4510 - val_accuracy: 0.9080 - val_loss: 0.2680
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.9250 - loss: 0.2407 - val_accuracy: 0.9370 - val_loss: 0.1983
Epoch 5/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9395 - loss: 0.2027 - val_accuracy: 0.9370 - val_loss: 0.1948


<keras.src.callbacks.history.History at 0x783da21aea90>

In [10]:
from datetime import datetime, timedelta

def random_date():
    base = datetime(2019, 1, 1)
    delta = timedelta(days=random.randint(0, 364))
    return base + delta

input_dates, target_dates = [], []
for _ in range(10000):
    d = random_date()
    input_dates.append(d.strftime("%Y-%m-%d"))
    target_dates.append(d.strftime("%B %d, %Y"))

print(input_dates[:3], target_dates[:3])


['2019-10-26', '2019-01-28', '2019-03-10'] ['October 26, 2019', 'January 28, 2019', 'March 10, 2019']


In [11]:
# Use character-level tokenization
from tensorflow.keras.layers import TextVectorization

input_vec = TextVectorization(output_mode="int", output_sequence_length=12)
target_vec = TextVectorization(output_mode="int", output_sequence_length=20)

input_vec.adapt(input_dates)
target_vec.adapt(target_dates)

X_enc = input_vec(input_dates)
X_dec = target_vec(target_dates)


In [13]:
encoder_inputs = tf.keras.Input(shape=(None,), dtype="int64")
x = tf.keras.layers.Embedding(100, 64)(encoder_inputs)
encoder_outputs, state_h, state_c = tf.keras.layers.LSTM(64, return_state=True)(x)

decoder_inputs = tf.keras.Input(shape=(None,), dtype="int64")
x = tf.keras.layers.Embedding(100, 64)(decoder_inputs)
x = tf.keras.layers.LSTM(64)(x, initial_state=[state_h, state_c])
outputs = tf.keras.layers.Dense(100, activation="softmax")(x)

model = tf.keras.Model([encoder_inputs, decoder_inputs], outputs)
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
