<a href="https://colab.research.google.com/github/leman-cap13/my_projects/blob/main/Date_conversion_model_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from datetime import date
import numpy as np


# cannot use strftime()'s %B format since it depends on the locale
MONTHS = ["January", "February", "March", "April", "May", "June",
          "July", "August", "September", "October", "November", "December"]


def random_dates(n_dates):
    min_date = date(1000, 1, 1).toordinal()
    max_date = date(9999, 12, 31).toordinal()


    ordinals = np.random.randint(max_date - min_date, size=n_dates) + min_date
    dates = [date.fromordinal(ordinal) for ordinal in ordinals]


    x = [MONTHS[dt.month - 1] + " " + dt.strftime("%d, %Y") for dt in dates]
    y = [dt.isoformat() for dt in dates]
    return x, y




np.random.seed(42)


n_dates = 100_000
x_example, y_example = random_dates(n_dates)
print("{:25s}{:25s}".format("Input", "Target"))
print("-" * 50)
for idx in range(3):
    print("{:25s}{:25s}".format(x_example[idx], y_example[idx]))


In [None]:
x_example[:10]

In [None]:
y_example[:10]

In [None]:
import tensorflow as tf

In [None]:
import tensorflow as tf

In [None]:
#Make input and target tokenization
max_len=30
embed_size=128
input_vec_layer=tf.keras.layers.TextVectorization( output_sequence_length=max_len,split='character', standardize='lower')
input_vec_layer.adapt(x_example)

target_vec_layer=tf.keras.layers.TextVectorization(output_sequence_length=max_len,split='character', standardize='lower')
target_vec_layer.adapt([f"startofseq {s} endofseq" for s in y_example])

In [None]:
vocab_size=len(input_vec_layer.get_vocabulary())
vocab_size

In [None]:
target_vocab_size=len(target_vec_layer.get_vocabulary())
target_vocab_size

In [None]:
input_vec_layer.get_vocabulary()

In [None]:
target_vec_layer.get_vocabulary()

In [None]:
#Input layer
tf.random.set_seed(42)
encoder_inputs=tf.keras.layers.Input(shape=[], dtype=tf.string)
decoder_inputs=tf.keras.layers.Input(shape=[], dtype=tf.string)

In [None]:
encoder_inputs

In [None]:
encoder_inputs.shape

In [None]:
decoder_inputs.shape

In [None]:
# Vectorization qatlarını modelə daxil et
encoder_input=input_vec_layer(encoder_inputs)
decoder_input=target_vec_layer(decoder_inputs)


In [None]:
encoder_input

In [None]:
encoder_input.shape

In [None]:
decoder_input.shape

In [None]:
#növbəti addım: Embedding qatları
encoder_embedding_layer=tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=128, mask_zero=True)
decoder_embedding_layer=tf.keras.layers.Embedding(input_dim=target_vocab_size, output_dim=128,mask_zero=True)

In [None]:
encoder_embedding_layer

In [None]:
# encoder_embedding_layer.weights[0].shape

In [None]:
# decoder_embedding_layer.weights[0].shape

In [None]:
#  fit embedding layers
encoder_embedding=encoder_embedding_layer(encoder_input)
decoder_embedding=decoder_embedding_layer(decoder_input)

In [None]:
encoder_embedding

In [None]:
encoder_embedding.shape

In [None]:
decoder_embedding.shape

In [None]:
# Encoder mask
# encoder_mask = encoder_embedding_layer.compute_mask(encoder_input)
# decoder_mask = decoder_embedding_layer.compute_mask(decoder_input)

In [None]:
#Encoder LSTM LAYER
encoder_lstm=tf.keras.layers.LSTM(512, return_state=True,return_sequences=True)
encoder_outputs, *encoder_states = encoder_lstm(encoder_embedding)
# mask=encoder_mask


#Decoder LSTM layer
decoder_lstm=tf.keras.layers.LSTM(512,return_sequences=True)
decoder_outputs= decoder_lstm(decoder_embedding, initial_state=encoder_states)
#mask=decoder_mask

In [None]:
# MultiHeadAttention
embed_dim = 512
num_heads = 2
mha_outputs = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(query=decoder_outputs,
                                                                                          value=encoder_outputs,
                                                                                          key=encoder_outputs)
concat_outputs=tf.keras.layers.Concatenate()([decoder_outputs,mha_outputs])

In [None]:
encoder_outputs.shape

In [None]:
decoder_outputs.shape

In [None]:
print(type(encoder_outputs))
print(type(decoder_outputs))

In [None]:
#attention layer
# attention_layer=tf.keras.layers.Attention()
# attention_outputs=attention_layer([decoder_outputs, encoder_outputs], mask=[decoder_mask, encoder_mask])

In [None]:
# output layer
output_layer=tf.keras.layers.Dense(target_vocab_size,activation="softmax")
Y_proba=output_layer(concat_outputs)

In [None]:
#train test split
X_train=tf.constant(x_example[:60_000])
X_valid=tf.constant(x_example[60_000:])
X_train_dec=tf.constant([f"startofseq {s}" for s in y_example[:60_000]])
X_valid_dec=tf.constant([f"startofseq {s}" for s in y_example[60_000:]])
Y_train=target_vec_layer([f"{s} endofseq" for s in y_example[:60_000]])
Y_valid=target_vec_layer([f"{s} endofseq" for s in y_example[60_000:]])

In [None]:
X_train.shape

In [None]:
X_train_dec.shape

In [None]:
Y_train.shape

In [None]:
#make model
model=tf.keras.Model(inputs=[encoder_inputs, decoder_inputs], outputs=[Y_proba])

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
model.fit((X_train, X_train_dec), Y_train, epochs=3, validation_data=((X_valid,X_valid_dec),Y_valid))

In [None]:
def translate_date(x_example):
    translation = ''
    for word_idx in range(max_len):
        X = tf.constant([x_example])
        dec_input = tf.constant([f"startofseq {translation}".strip()])
        y_proba = model.predict([X, dec_input], verbose=0)[0, word_idx]
        predicted_word_id = np.argmax(y_proba)
        predicted_word = target_vec_layer.get_vocabulary()[predicted_word_id]
        if predicted_word == 'endofseq':
            break

        translation += predicted_word
    return translation


In [None]:
max_len

In [None]:
translate_date('September 20, 7075')

In [None]:
model.summary()