<a href="https://colab.research.google.com/github/mavrick0/DEEP-LEARNING-EXPRIMENTS/blob/main/DL0606.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.optimizers import Adam

sentences = [
    "I love machine learning",
    "Natural language processing is fascinating"
]

pos_tags = [
    "PRP VBP NN NN",
    "NN NNS VBZ VBG"
]

# Tokenize text and tags
tokenizer_text = Tokenizer()
tokenizer_text.fit_on_texts(sentences)
text_sequences = tokenizer_text.texts_to_sequences(sentences)
vocab_size_text = len(tokenizer_text.word_index) + 1

tokenizer_tags = Tokenizer()
tokenizer_tags.fit_on_texts(pos_tags)
tag_sequences = tokenizer_tags.texts_to_sequences(pos_tags)
vocab_size_tags = len(tokenizer_tags.word_index) + 1

# Pad sequences
max_len_text = max(len(seq) for seq in text_sequences)
max_len_tags = max(len(seq) for seq in tag_sequences)

X = pad_sequences(text_sequences, maxlen=max_len_text, padding='post')
y = pad_sequences(tag_sequences, maxlen=max_len_tags, padding='post')

# One-hot encode the output tags
y = np.array([tf.keras.utils.to_categorical(seq, num_classes=vocab_size_tags) for seq in y])

# Define model parameters
embedding_dim = 50
hidden_units = 64

# Encoder
encoder_inputs = Input(shape=(max_len_text,))
encoder_embedding = Embedding(input_dim=vocab_size_text, output_dim=embedding_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True)
encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_embedding)
encoder_states = [encoder_state_h, encoder_state_c]

# Decoder
decoder_inputs = Input(shape=(max_len_tags, vocab_size_tags))
decoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True)
decoder_outputs, _ , _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(vocab_size_tags, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Build and compile the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Prepare decoder inputs (shifted targets)
decoder_input_data = np.zeros_like(y)
decoder_input_data[:, 1:, :] = y[:, :-1, :]
decoder_input_data[:, 0, :] = np.zeros((len(sentences), vocab_size_tags))

# Train the model
model.fit([X, decoder_input_data], y, epochs=10, batch_size=2, validation_split=0.1)

def predict_pos(sentence):
    seq = tokenizer_text.texts_to_sequences([sentence])
    seq = pad_sequences(seq, maxlen=max_len_text, padding='post')

    # Initialize decoder input for prediction
    decoder_input = np.zeros((1, max_len_tags, vocab_size_tags))

    # Predict POS tags
    prediction = model.predict([seq, decoder_input])

    # Get the predicted tags
    predicted_tags = np.argmax(prediction, axis=-1)

    # Return the POS tags as a string
    return ' '.join(tokenizer_tags.index_word.get(tag, '') for tag in predicted_tags[0])

# Test with a new sentence
new_sentence = "I enjoy deep learning"
print(predict_pos(new_sentence))


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.5000 - loss: 1.9307 - val_accuracy: 0.0000e+00 - val_loss: 1.9634
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 673ms/step - accuracy: 0.5000 - loss: 1.9188 - val_accuracy: 0.0000e+00 - val_loss: 1.9653
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.5000 - loss: 1.9067 - val_accuracy: 0.0000e+00 - val_loss: 1.9673
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.5000 - loss: 1.8945 - val_accuracy: 0.0000e+00 - val_loss: 1.9694
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.5000 - loss: 1.8820 - val_accuracy: 0.0000e+00 - val_loss: 1.9717
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.5000 - loss: 1.8692 - val_accuracy: 0.0000e+00 - val_loss: 1.9741
Epoch 7/10
[1m1/1[0m 