<a href="https://colab.research.google.com/github/poojasrign/Deep-Learning/blob/main/Ex_6_POS_using_seq_seq_Architecture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
sentences = [
    ["the", "cat", "sat"],
    ["the", "dog", "barked"],
    ["a", "man", "runs"],
    ["the", "woman", "eats"],
]

pos_tags = [
    ["DET", "NOUN", "VERB"],
    ["DET", "NOUN", "VERB"],
    ["DET", "NOUN", "VERB"],
    ["DET", "NOUN", "VERB"],
]


In [None]:
words = list(set(w for s in sentences for w in s))
tags = list(set(t for s in pos_tags for t in s))

word2idx = {w: i + 2 for i, w in enumerate(words)}  # reserve 0,1 for PAD/UNK
word2idx["PAD"] = 0
word2idx["UNK"] = 1

tag2idx = {t: i for i, t in enumerate(tags)}

idx2tag = {i: t for t, i in tag2idx.items()}

vocab_size = len(word2idx)
tag_size = len(tag2idx)
max_len = max(len(s) for s in sentences)


In [None]:
X = [[word2idx.get(w, word2idx["UNK"]) for w in s] for s in sentences]
y = [[tag2idx[t] for t in s] for s in pos_tags]

X = pad_sequences(X, maxlen=max_len, padding="post")
y = pad_sequences(y, maxlen=max_len, padding="post")

y = [to_categorical(i, num_classes=tag_size) for i in y]
y = np.array(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)



In [None]:
input_layer = Input(shape=(max_len,))
embedding = Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len)(input_layer)
lstm = LSTM(64, return_sequences=True)(embedding)
output = TimeDistributed(Dense(tag_size, activation="softmax"))(lstm)

model = Model(input_layer, output)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()



In [None]:
model.fit(X_train, y_train, batch_size=2, epochs=15, validation_data=(X_test, y_test))
test_sentence = ["the", "cat", "runs"]
test_seq = [word2idx.get(w, word2idx["UNK"]) for w in test_sentence]
test_seq = pad_sequences([test_seq], maxlen=max_len, padding="post")

pred = model.predict(test_seq)
pred_tags = [idx2tag[np.argmax(p)] for p in pred[0]]

print("Sentence:", test_sentence)
print("Predicted POS:", pred_tags)

Epoch 1/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 483ms/step - accuracy: 0.4074 - loss: 1.0982 - val_accuracy: 0.3333 - val_loss: 1.0972
Epoch 2/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.5370 - loss: 1.0932 - val_accuracy: 0.6667 - val_loss: 1.0954
Epoch 3/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.7407 - loss: 1.0888 - val_accuracy: 0.6667 - val_loss: 1.0935
Epoch 4/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.6111 - loss: 1.0845 - val_accuracy: 0.6667 - val_loss: 1.0915
Epoch 5/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.6111 - loss: 1.0800 - val_accuracy: 0.6667 - val_loss: 1.0893
Epoch 6/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - accuracy: 0.6111 - loss: 1.0752 - val_accuracy: 0.6667 - val_loss: 1.0869
Epoch 7/15
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━