In [25]:
import string
import os
import numpy as np
import tensorflow as tf

from tensorflow import keras
from datetime import datetime, timedelta
from typing import Callable

In [26]:
POSSIBLE_INPUT_CHARS = f'{"".join(list(map(str, range(10))))}{string.ascii_lowercase}-'
POSSIBLE_OUTPUT_CHARS = f'{"".join(list(map(str, range(10))))}-'
CURRENT_DAY = datetime.utcnow()
ROW_COUNT = 14000


def string_to_ids(s: str, chars: str) -> list[int]:
    ids = []

    for char in s.lower():
        idx = chars.index(char)

        ids.append(idx)

    return ids


def shuffle(vals: tf.RaggedTensor, targets: tf.RaggedTensor) -> (tf.RaggedTensor, tf.RaggedTensor):
    a = tf.random.shuffle(tf.range(vals.shape[0]))
    b = tf.reshape(a, (vals.shape[0], 1))
    shuffled_vals = tf.gather_nd(vals, b)
    shuffled_targets = tf.gather_nd(targets, b)

    return shuffled_vals, shuffled_targets


def pad_year(year: int) -> str:
    return f'{"".join(map(str, [0] * (4 - len(str(year)))))}{year}'


def get_date_pairs() -> (np.ndarray, np.ndarray):
    xs = []
    ys = []
    all_years = np.arange(ROW_COUNT).tolist()
    years_padded = np.array([pad_year(year) for year in all_years])

    np.random.shuffle(years_padded)

    for counter in range(ROW_COUNT):
        date = CURRENT_DAY - timedelta(days=counter)
        year_month_day = date.strftime('%Y-%m-%d')
        year_month_name_day = date.strftime('%Y-%B-%d')
        _, month_name, day = year_month_name_day.split('-')
        year = years_padded[counter]
        year_month_day = f'{year}-{year_month_day[5:]}'
        xs.append(tf.constant(
            string_to_ids(f'{year}-', POSSIBLE_INPUT_CHARS) +
            string_to_ids(f'{month_name}-', POSSIBLE_INPUT_CHARS) +
            string_to_ids(day, POSSIBLE_INPUT_CHARS)))
        ys.append(tf.constant(string_to_ids(year_month_day, POSSIBLE_OUTPUT_CHARS)))

    ragged_xs = tf.ragged.stack(xs, axis=0)
    ragged_ys = tf.ragged.stack(ys, axis=0)

    return shuffle(ragged_xs, ragged_ys)


X, y = get_date_pairs()
X = (X + 1).to_tensor()
y = y.to_tensor()
seventy_percent_count = int(X.shape[0] * .7)
ninety_percent_count = int(X.shape[0] * .9)
X_train, y_train = X[:seventy_percent_count, :], y[:seventy_percent_count, :]
X_valid, y_valid = X[seventy_percent_count:ninety_percent_count, :], y[seventy_percent_count:ninety_percent_count, :]
X_test, y_test = X[ninety_percent_count:, :], y[ninety_percent_count:, :]
max_output_length = y.shape[1]

In [27]:
sos_id = len(POSSIBLE_OUTPUT_CHARS) + 1


def shifted_output_sequences(y: tf.Tensor) -> tf.Tensor:
    sos_tokens = tf.fill(dims=(len(y), 1), value=sos_id)

    return tf.concat([sos_tokens, y[:, :-1]], axis=1)


X_train_decoder = shifted_output_sequences(y_train)
X_valid_decoder = shifted_output_sequences(y_valid)
X_test_decoder = shifted_output_sequences(y_test)

In [28]:
def get_callbacks() -> (keras.callbacks.EarlyStopping, keras.callbacks.ModelCheckpoint, keras.callbacks.TensorBoard):
    the_name = 'encoder_decoder_w_teacher_forcing'
    patience = 5
    model_dir = os.path.join(os.curdir, 'saved_models')
    run_logdir_root = os.path.join(os.curdir, 'tensor_logs')
    dirs = [
        name
        for name in os.listdir(run_logdir_root)
        if os.path.isdir(os.path.join(run_logdir_root, name)) and name.startswith(name)
    ]
    dirs_count = len(dirs) + 1
    run_logdir = os.path.join(run_logdir_root, f'{the_name}_{dirs_count}')
    early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)
    model_checkpoint = keras.callbacks.ModelCheckpoint(os.path.join(model_dir, f'{the_name}_{dirs_count}.h5'), save_best_only=True)
    tensorboard = keras.callbacks.TensorBoard(run_logdir, histogram_freq=1, profile_batch=10)

    return early_stopping, model_checkpoint, tensorboard

In [31]:
def scheduler(drop_after: int) -> Callable[[int, int], float]:
    def drop(epoch: int, learning_rate: int) -> float:
        if epoch < drop_after:
            return learning_rate
        else:
            return learning_rate * tf.math.exp(-0.1)

    return drop


def get_model() -> keras.Model:
    embedding_size = 32
    encoder_input = keras.Input(shape=(None,))
    encoder_embedding = keras.layers.Embedding(input_dim=len(POSSIBLE_INPUT_CHARS) + 1,
                                               output_dim=embedding_size)(encoder_input)
    encoder_lstm = keras.layers.LSTM(128, return_state=True)
    _, encoder_state_h, encoder_state_c = encoder_lstm(encoder_embedding)
    encoder_states = [encoder_state_h, encoder_state_c]

    decoder_input = keras.Input(shape=(None,))
    decoder_embedding = keras.layers.Embedding(input_dim=len(POSSIBLE_OUTPUT_CHARS) + 2,
                                               output_dim=embedding_size)(decoder_input)
    decoder_lstm = keras.layers.LSTM(128, return_sequences=True)
    decoder_lstm_output = decoder_lstm(decoder_embedding, initial_state=encoder_states)
    decoder_dense = keras.layers.Dense(len(POSSIBLE_OUTPUT_CHARS) + 1, activation='softmax')
    decoder_outputs = decoder_dense(decoder_lstm_output)
    model = keras.Model(inputs=[encoder_input, decoder_input], outputs=[decoder_outputs])

    return model

In [33]:
model = get_model()
adam_opt = keras.optimizers.Adam(learning_rate=.01)

model.compile(optimizer=adam_opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

lr_scheduler = keras.callbacks.LearningRateScheduler(scheduler(10))
early_stopping, model_checkpoint, tensorboard = get_callbacks()
history = model.fit(
    [X_train, X_train_decoder],
    y_train,
    epochs=40,
    validation_data=([X_valid, X_valid_decoder], y_valid),
    callbacks=[early_stopping, model_checkpoint, tensorboard, lr_scheduler])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
