<a href="https://colab.research.google.com/github/talhachattha162/widget-layout-design-1/blob/main/with_two_lstm_layers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project


In [1]:
import numpy as np
import random
import string
from tensorflow.keras import  models,Input,Model
from tensorflow.keras.layers import LSTM,Embedding,TimeDistributed, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import plotly.express as px
import pandas as pd

In [2]:
# -------------------- Constants --------------------
OPERATORS = ['+', '-', '*', '/']
IDENTIFIERS = list('abcde')
SPECIAL_TOKENS = ['PAD', 'SOS', 'EOS']
SYMBOLS = ['(', ')', '+', '-', '*', '/']
VOCAB = SPECIAL_TOKENS + SYMBOLS + IDENTIFIERS + ['JUNK']

token_to_id = {tok: i for i, tok in enumerate(VOCAB)}
id_to_token = {i: tok for tok, i in token_to_id.items()}
VOCAB_SIZE = len(VOCAB)
PAD_ID = token_to_id['PAD']
EOS_ID = token_to_id['EOS']
SOS_ID = token_to_id['SOS']


MAX_DEPTH = 3
MAX_LEN = 4*2**MAX_DEPTH -2

EMBEDDING_DIM = 192
LSTM_UNITS = 192

In [3]:
# -------------------- Expression Generation --------------------
def generate_infix_expression(max_depth):
    if max_depth == 0:
        return random.choice(IDENTIFIERS)
    elif random.random() < 0.5:
        return generate_infix_expression(max_depth - 1)
    else:
        left = generate_infix_expression(max_depth - 1)
        right = generate_infix_expression(max_depth - 1)
        op = random.choice(OPERATORS)
        return f'({left} {op} {right})'

def tokenize(expr):
    return [c for c in expr if c in token_to_id]

def infix_to_postfix(tokens):
    precedence = {'+': 1, '-': 1, '*': 2, '/': 2}
    output, stack = [], []
    for token in tokens:
        if token in IDENTIFIERS:
            output.append(token)
        elif token in OPERATORS:
            while stack and stack[-1] in OPERATORS and precedence[stack[-1]] >= precedence[token]:
                output.append(stack.pop())
            stack.append(token)
        elif token == '(':
            stack.append(token)
        elif token == ')':
            while stack and stack[-1] != '(':
                output.append(stack.pop())
            stack.pop()
    while stack:
        output.append(stack.pop())
    return output

def encode(tokens, max_len=MAX_LEN):
    ids = [token_to_id[t] for t in tokens] + [EOS_ID]
    return ids + [PAD_ID] * (max_len - len(ids))

def generate_dataset(n,max_depth=MAX_DEPTH):
    X, Y = [], []
    for _ in range(n):
        expr = generate_infix_expression(MAX_DEPTH)
        #expr = expr_gen.generate(max_depth=max_dthep)
        infix = tokenize(expr)
        postfix = infix_to_postfix(infix)
        X.append(encode(infix))
        Y.append(encode(postfix))
    return np.array(X), np.array(Y)

#you might use the shift function for teacher-forcing
def shift_right(seqs):
    shifted = np.zeros_like(seqs)
    shifted[:, 1:] = seqs[:, :-1]
    shifted[:, 0] = SOS_ID
    return shifted

**Started from here**

In [4]:
# Layers
encoder_embedding = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True)

# Two-layer encoder
encoder_lstm1 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True)
encoder_lstm2 = LSTM(LSTM_UNITS, return_state=True)

decoder_embedding = Embedding(VOCAB_SIZE, EMBEDDING_DIM)

# Two-layer decoder
decoder_lstm1 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True)
decoder_lstm2 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True)

decoder_dense = TimeDistributed(Dense(VOCAB_SIZE, activation='softmax'))

# Encoder
encoder_inputs = Input(shape=(MAX_LEN,))
x = encoder_embedding(encoder_inputs)
x, _, _ = encoder_lstm1(x)
_, state_h, state_c = encoder_lstm2(x)
encoder_states = [state_h, state_c]

# Decoder (in training or inference)
decoder_inputs = Input(shape=(None,))
x = decoder_embedding(decoder_inputs)
x, _, _ = decoder_lstm1(x, initial_state=encoder_states)
x, state_h, state_c = decoder_lstm2(x)
decoder_outputs = decoder_dense(x)


#Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [5]:
# Step 2: Prepare and Train the Model

# 1. Generate and preprocess data
X_train, Y_train = generate_dataset(20000)
decoder_input_train = shift_right(Y_train)

# Pad all sequences to MAX_LEN
X_train = pad_sequences(X_train, maxlen=MAX_LEN, padding='post', truncating='post')
Y_train = pad_sequences(Y_train, maxlen=MAX_LEN, padding='post', truncating='post')

decoder_input_train = pad_sequences(decoder_input_train, maxlen=MAX_LEN, padding='post', truncating='post')

# 2. Train the model
history= model.fit(
    [X_train, decoder_input_train],  # Inputs: encoder + decoder
    Y_train,                        # Target output
    batch_size=64,
    epochs=10,
    validation_split=0.2
)

Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 483ms/step - accuracy: 0.7905 - loss: 0.6656 - val_accuracy: 0.8448 - val_loss: 0.3418
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 488ms/step - accuracy: 0.8508 - loss: 0.3228 - val_accuracy: 0.8817 - val_loss: 0.2758
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 478ms/step - accuracy: 0.9029 - loss: 0.2315 - val_accuracy: 0.9721 - val_loss: 0.0814
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 475ms/step - accuracy: 0.9792 - loss: 0.0616 - val_accuracy: 0.9932 - val_loss: 0.0231
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 489ms/step - accuracy: 0.9948 - loss: 0.0183 - val_accuracy: 0.9971 - val_loss: 0.0105
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 486ms/step - accuracy: 0.9972 - loss: 0.0100 - val_accuracy: 0.9970 - val_loss: 0.0101
Epoc

In [6]:

history_df = pd.DataFrame(history.history)
history_df['epoch'] = range(1, len(history_df)+1)

fig = px.line(history_df,
              x='epoch',
              y=['loss', 'val_loss', 'accuracy', 'val_accuracy'],
              labels={'value': 'Metric Value', 'variable': 'Metric'},
              title='Training Progress')
fig.show()

In [7]:
# --- 1. Rebuild encoder_model for inference: output states from both LSTM layers ---

# Encoder: get both layers' states
encoder_outputs1, state_h1, state_c1 = encoder_lstm1(encoder_embedding(encoder_inputs))
encoder_outputs2, state_h2, state_c2 = encoder_lstm2(encoder_outputs1)

# Inference encoder model: outputs all four states
encoder_model = Model(
    encoder_inputs,
    [state_h1, state_c1, state_h2, state_c2]
)

# --- 2. Decoder model for inference: accepts four states as input ---

# Decoder state inputs
decoder_state_input_h1 = Input(shape=(LSTM_UNITS,))
decoder_state_input_c1 = Input(shape=(LSTM_UNITS,))
decoder_state_input_h2 = Input(shape=(LSTM_UNITS,))
decoder_state_input_c2 = Input(shape=(LSTM_UNITS,))
decoder_inputs_inf = Input(shape=(None,))

x = decoder_embedding(decoder_inputs_inf)
x, out_h1, out_c1 = decoder_lstm1(x, initial_state=[decoder_state_input_h1, decoder_state_input_c1])
x, out_h2, out_c2 = decoder_lstm2(x, initial_state=[decoder_state_input_h2, decoder_state_input_c2])
decoder_outputs = decoder_dense(x)

decoder_model = Model(
    [decoder_inputs_inf, decoder_state_input_h1, decoder_state_input_c1, decoder_state_input_h2, decoder_state_input_c2],
    [decoder_outputs, out_h1, out_c1, out_h2, out_c2]
)

# --- 3. Update decode_sequence to use four encoder states ---

def decode_sequence(input_seq, tokenizer, reverse_tokenizer, start_token_idx, end_token_idx):
    state_h1, state_c1, state_h2, state_c2 = encoder_model.predict(input_seq, verbose=0)
    target_seq = np.array([[start_token_idx]])
    decoded = []

    for _ in range(MAX_LEN):
        outputs, h1, c1, h2, c2 = decoder_model.predict(
            [target_seq, state_h1, state_c1, state_h2, state_c2], verbose=0)
        token_idx = np.argmax(outputs[0, -1])
        token = reverse_tokenizer.get(token_idx, '')
        if token_idx == end_token_idx:
            break
        decoded.append(token)
        target_seq = np.array([[token_idx]])
        state_h1, state_c1, state_h2, state_c2 = h1, c1, h2, c2
    return ' '.join(decoded)

# --- 4. Evaluate as before (no change needed) ---

In [10]:

# --- Cell 3: evaluate_model (no change needed, but can strip extra spaces for fairness) ---

def evaluate_model(encoder_model, decoder_model,
                   input_texts, target_texts,
                   tokenizer, reverse_tokenizer,
                   start_token_idx, end_token_idx):
    correct = 0
    total = len(input_texts)

    for i in range(total):
        input_seq = input_texts[i:i+1]
        decoded = decode_sequence(input_seq, tokenizer, reverse_tokenizer, start_token_idx, end_token_idx)

        decoded_clean = decoded.replace('EOS', '').replace('PAD', '').replace('JUNK', '').strip()
        target_clean = ' '.join([
            reverse_tokenizer.get(idx, '') for idx in target_texts[i]
            if idx not in [0, start_token_idx, end_token_idx, tokenizer['PAD'], tokenizer.get('JUNK', -1)]
        ]).strip()
        print(f"Predicted : {decoded_clean}")
        print(f"Expected  : {target_clean}")
        print("---")

        if decoded_clean == target_clean:
            correct += 1

    accuracy = correct / total
    return accuracy



In [11]:

# --- Cell 4: Run evaluation (no change needed except for possible print fix) ---

accuracies = []
num_rounds = 10
for i in range(num_rounds):
    X_val, Y_val = generate_dataset(20)
    decoder_input_val = shift_right(Y_val)
    X_val = pad_sequences(X_val, maxlen=MAX_LEN, padding='post', truncating='post')
    decoder_input_val = pad_sequences(decoder_input_val, maxlen=MAX_LEN, padding='post', truncating='post')
    Y_val = pad_sequences(Y_val, maxlen=MAX_LEN, padding='post', truncating='post')

    print('Round--->', i+1)
    accuracy = evaluate_model(
        encoder_model,
        decoder_model,
        X_val,
        Y_val,
        token_to_id,
        id_to_token,
        SOS_ID,
        EOS_ID)
    accuracies.append(accuracy)

mean_acc = np.mean(accuracies)
std_acc = np.std(accuracies)
print('\n')
print("****************************")
print("++++++++++++++++++++++++++++")
print("****************************")
print(f"Mean Accuracy over {num_rounds} rounds: {mean_acc:.4f}")
print(f"Standard Deviation: {std_acc:.4f}")
print("****************************")
print("----------------------------")
print("****************************")

Round---> 1
Predicted : c
Expected  : b e -
---
Predicted : c a +
Expected  : e d - a *
---
Predicted : c a +
Expected  : e e b * *
---
Predicted : a
Expected  : d b /
---
Predicted : c
Expected  : d e +
---
Predicted : c
Expected  : d e /
---
Predicted : c e +
Expected  : d d / e - a b - +
---
Predicted : a
Expected  : d
---
Predicted : c
Expected  : a
---
Predicted : c c +
Expected  : e b * e / e d * *
---
Predicted : e
Expected  : c
---
Predicted : c e +
Expected  : b b e / - c -
---
Predicted : c
Expected  : b b -
---
Predicted : c a +
Expected  : a d /
---
Predicted : c
Expected  : d a d + *
---
Predicted : c e +
Expected  : c d + e c * * b d / /
---
Predicted : c e +
Expected  : e a * c / c e - +
---
Predicted : e
Expected  : c
---
Predicted : c a +
Expected  : e b / a d / +
---
Predicted : c e +
Expected  : c e *
---
Round---> 2
Predicted : c e +
Expected  : e c + e a / + e e * c e - + *
---
Predicted : c e +
Expected  : b d + b d - *
---
Predicted : c
Expected  : a e + d -
---
