<a href="https://colab.research.google.com/github/talhachattha162/widget-layout-design-1/blob/main/encoder-decoder_100per_acc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project


In [1]:
# --- Imports ---
import numpy as np
import random
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import LSTM, Embedding, TimeDistributed, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
import plotly.express as px
import pandas as pd


In [None]:

# --- Constants ---
OPERATORS = ['+', '-', '*', '/']
IDENTIFIERS = list('abcde')
SPECIAL_TOKENS = ['PAD', 'SOS', 'EOS']
SYMBOLS = ['(', ')', '+', '-', '*', '/']
VOCAB = SPECIAL_TOKENS + SYMBOLS + IDENTIFIERS + ['JUNK']

token_to_id = {tok: i for i, tok in enumerate(VOCAB)}
id_to_token = {i: tok for tok, i in token_to_id.items()}
VOCAB_SIZE = len(VOCAB)
PAD_ID = token_to_id['PAD']
EOS_ID = token_to_id['EOS']
SOS_ID = token_to_id['SOS']

MAX_DEPTH = 3
MAX_LEN = 4*2**MAX_DEPTH -2

EMBEDDING_DIM = 192
LSTM_UNITS = 192



In [None]:
# --- Data Generation Functions ---
def generate_infix_expression(max_depth):
    if max_depth == 0:
        return random.choice(IDENTIFIERS)
    elif random.random() < 0.5:
        return generate_infix_expression(max_depth - 1)
    else:
        left = generate_infix_expression(max_depth - 1)
        right = generate_infix_expression(max_depth - 1)
        op = random.choice(OPERATORS)
        return f'({left} {op} {right})'

def tokenize(expr):
    return [c for c in expr if c in token_to_id]

def infix_to_postfix(tokens):
    precedence = {'+': 1, '-': 1, '*': 2, '/': 2}
    output, stack = [], []
    for token in tokens:
        if token in IDENTIFIERS:
            output.append(token)
        elif token in OPERATORS:
            while stack and stack[-1] in OPERATORS and precedence[stack[-1]] >= precedence[token]:
                output.append(stack.pop())
            stack.append(token)
        elif token == '(':
            stack.append(token)
        elif token == ')':
            while stack and stack[-1] != '(':
                output.append(stack.pop())
            stack.pop()
    while stack:
        output.append(stack.pop())
    return output

def encode(tokens, max_len=MAX_LEN):
    ids = [token_to_id[t] for t in tokens] + [EOS_ID]
    return ids + [PAD_ID] * (max_len - len(ids))

def generate_dataset(n,max_depth=MAX_DEPTH):
    X, Y = [], []
    for _ in range(n):
        expr = generate_infix_expression(MAX_DEPTH)
        infix = tokenize(expr)
        postfix = infix_to_postfix(infix)
        X.append(encode(infix))
        Y.append(encode(postfix))
    return np.array(X), np.array(Y)

def shift_right(seqs):
    shifted = np.zeros_like(seqs)
    shifted[:, 1:] = seqs[:, :-1]
    shifted[:, 0] = SOS_ID
    return shifted



In [None]:
# --- Model Definition: Corrected for Two-Layer LSTM ---
encoder_embedding = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True, name='encoder_embedding')
decoder_embedding = Embedding(VOCAB_SIZE, EMBEDDING_DIM, name='decoder_embedding')

encoder_lstm1 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True, name='encoder_lstm1')
encoder_lstm2 = LSTM(LSTM_UNITS, return_state=True, name='encoder_lstm2')
decoder_lstm1 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True, name='decoder_lstm1')
decoder_lstm2 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True, name='decoder_lstm2')
decoder_dense = TimeDistributed(Dense(VOCAB_SIZE, activation='softmax'), name='decoder_output')

# Encoder
encoder_inputs = Input(shape=(MAX_LEN,), name='encoder_inputs')
x = encoder_embedding(encoder_inputs)
encoder_outputs1, state_h1, state_c1 = encoder_lstm1(x)
encoder_outputs2, state_h2, state_c2 = encoder_lstm2(encoder_outputs1)

# Decoder: Use both encoder layers' states as initial states for their corresponding decoder layers
decoder_inputs = Input(shape=(None,), name='decoder_inputs')
y = decoder_embedding(decoder_inputs)
y, _, _ = decoder_lstm1(y, initial_state=[state_h1, state_c1])
y, _, _ = decoder_lstm2(y, initial_state=[state_h2, state_c2])
decoder_outputs = decoder_dense(y)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()



In [None]:
# --- Training ---
X_train, Y_train = generate_dataset(20000)
decoder_input_train = shift_right(Y_train)
X_train = pad_sequences(X_train, maxlen=MAX_LEN, padding='post', truncating='post')
Y_train = pad_sequences(Y_train, maxlen=MAX_LEN, padding='post', truncating='post')
decoder_input_train = pad_sequences(decoder_input_train, maxlen=MAX_LEN, padding='post', truncating='post')

history = model.fit(
    [X_train, decoder_input_train],
    Y_train,
    batch_size=64,
    epochs=10,
    validation_split=0.2
)


In [2]:

# --- Plot Training History ---
history_df = pd.DataFrame(history.history)
history_df['epoch'] = range(1, len(history_df)+1)
fig = px.line(history_df, x='epoch', y=['loss', 'val_loss', 'accuracy', 'val_accuracy'],
              labels={'value': 'Metric Value', 'variable': 'Metric'},
              title='Training Progress')
fig.show()


Round---> 1
Input     : [[ 3 12  6  3 13  8 11  4  4  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0]]
Predicted : d e c / -
Expected  : d e c / -
---
Input     : [[ 3  3  3 12  6 10  4  7  3 10  5 13  4  4  8  3  3 10  7  9  4  7  3 13
   5  9  4  4  4  2]]
Predicted : d b - b e + * b e a * a + * -
Expected  : d b - b e + * b a * e a + * /
---
Input     : [[ 3  3 13  7  9  4  5 13  4  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0]]
Predicted : e a * e +
Expected  : e a * e +
---
Input     : [[ 3  3 11  5  3 11  5  9  4  4  6  3 10  6  3 12  8 11  4  4  4  2  0  0
   0  0  0  0  0  0]]
Predicted : c c a + + b d c / - -
Expected  : c c a + + b d c / - -
---
Input     : [[ 3 12  7 13  4  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0]]
Predicted : d e *
Expected  : d e *
---
Input     : [[13  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0]]
Predicted : e
Expected  : e
---
Input     : 

In [None]:

# --- Inference Models ---
# Encoder inference model: outputs all four states
encoder_model = Model(encoder_inputs, [state_h1, state_c1, state_h2, state_c2])

# Decoder inference model: accepts four states as input
decoder_state_input_h1 = Input(shape=(LSTM_UNITS,))
decoder_state_input_c1 = Input(shape=(LSTM_UNITS,))
decoder_state_input_h2 = Input(shape=(LSTM_UNITS,))
decoder_state_input_c2 = Input(shape=(LSTM_UNITS,))
decoder_inputs_inf = Input(shape=(None,))
y = decoder_embedding(decoder_inputs_inf)
y, out_h1, out_c1 = decoder_lstm1(y, initial_state=[decoder_state_input_h1, decoder_state_input_c1])
y, out_h2, out_c2 = decoder_lstm2(y, initial_state=[decoder_state_input_h2, decoder_state_input_c2])
decoder_outputs_inf = decoder_dense(y)
decoder_model = Model(
    [decoder_inputs_inf, decoder_state_input_h1, decoder_state_input_c1, decoder_state_input_h2, decoder_state_input_c2],
    [decoder_outputs_inf, out_h1, out_c1, out_h2, out_c2]
)



In [None]:
# --- Decoding and Evaluation ---
def decode_sequence(input_seq, tokenizer, reverse_tokenizer, start_token_idx, end_token_idx):
    state_h1, state_c1, state_h2, state_c2 = encoder_model.predict(input_seq, verbose=0)
    target_seq = np.array([[start_token_idx]])
    decoded = []
    for _ in range(MAX_LEN):
        outputs, h1, c1, h2, c2 = decoder_model.predict(
            [target_seq, state_h1, state_c1, state_h2, state_c2], verbose=0)
        token_idx = np.argmax(outputs[0, -1])
        token = reverse_tokenizer.get(token_idx, '')
        if token_idx == end_token_idx:
            break
        decoded.append(token)
        target_seq = np.array([[token_idx]])
        state_h1, state_c1, state_h2, state_c2 = h1, c1, h2, c2
    return ' '.join(decoded)


In [None]:

def evaluate_model(encoder_model, decoder_model,
                   input_texts, target_texts,
                   tokenizer, reverse_tokenizer,
                   start_token_idx, end_token_idx):
    correct = 0
    total = len(input_texts)
    for i in range(total):
        input_seq = input_texts[i:i+1]
        decoded = decode_sequence(input_seq, tokenizer, reverse_tokenizer, start_token_idx, end_token_idx)
        decoded_clean = decoded.replace('EOS', '').replace('PAD', '').replace('JUNK', '').strip()
        target_clean = ' '.join([
            reverse_tokenizer.get(idx, '') for idx in target_texts[i]
            if idx not in [0, start_token_idx, end_token_idx, tokenizer['PAD'], tokenizer.get('JUNK', -1)]
        ]).strip()
        if i < 50:
          print(f"Input     : {input_seq}")
          print(f"Predicted : {decoded_clean}")
          print(f"Expected  : {target_clean}")
          print("---")
        if decoded_clean == target_clean:
            correct += 1
    accuracy = correct / total
    return accuracy


In [None]:

# --- Test Accuracy ---
accuracies = []
num_rounds = 10
for i in range(num_rounds):
    X_val, Y_val = generate_dataset(20)
    decoder_input_val = shift_right(Y_val)
    X_val = pad_sequences(X_val, maxlen=MAX_LEN, padding='post', truncating='post')
    decoder_input_val = pad_sequences(decoder_input_val, maxlen=MAX_LEN, padding='post', truncating='post')
    Y_val = pad_sequences(Y_val, maxlen=MAX_LEN, padding='post', truncating='post')
    print('Round--->', i+1)

    accuracy = evaluate_model(
        encoder_model,
        decoder_model,
        X_val,
        Y_val,
        token_to_id,
        id_to_token,
        SOS_ID,
        EOS_ID)
    accuracies.append(accuracy)

mean_acc = np.mean(accuracies)
std_acc = np.std(accuracies)
print('\n')
print("****************************")
print("++++++++++++++++++++++++++++")
print("****************************")
print(f"Mean Accuracy over {num_rounds} rounds: {mean_acc:.4f}")
print(f"Standard Deviation: {std_acc:.4f}")
print("****************************")
print("----------------------------")
print("****************************")