<a href="https://colab.research.google.com/github/talhachattha162/widget-layout-design-1/blob/main/Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1><b>Infix to Postfix Translation using Encoder-Decoder Architecture<b></h1>

In [1]:
# 1. --> Imports <--
import numpy as np
import random
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import LSTM, Embedding, TimeDistributed, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
import plotly.express as px
import pandas as pd

In [2]:
# 2. --> Data Generation & Tokenization Setup <--
OPERATORS = ['+', '-', '*', '/']
IDENTIFIERS = list('abcde')
SPECIAL_TOKENS = ['PAD', 'SOS', 'EOS']
SYMBOLS = ['(', ')', '+', '-', '*', '/']
VOCAB = SPECIAL_TOKENS + SYMBOLS + IDENTIFIERS + ['JUNK']

token_to_id = {tok: i for i, tok in enumerate(VOCAB)}
id_to_token = {i: tok for tok, i in token_to_id.items()}
VOCAB_SIZE = len(VOCAB)
PAD_ID = token_to_id['PAD']
EOS_ID = token_to_id['EOS']
SOS_ID = token_to_id['SOS']

MAX_DEPTH = 3
MAX_LEN = 4*2**MAX_DEPTH - 2

def generate_infix_expression(max_depth):
    if max_depth == 0:
        return random.choice(IDENTIFIERS)
    elif random.random() < 0.5:
        return generate_infix_expression(max_depth - 1)
    else:
        left = generate_infix_expression(max_depth - 1)
        right = generate_infix_expression(max_depth - 1)
        op = random.choice(OPERATORS)
        return f'({left} {op} {right})'

def tokenize(expr):
    return [c for c in expr if c in token_to_id]

def infix_to_postfix(tokens):
    precedence = {'+': 1, '-': 1, '*': 2, '/': 2}
    output, stack = [], []
    for token in tokens:
        if token in IDENTIFIERS:
            output.append(token)
        elif token in OPERATORS:
            while stack and stack[-1] in OPERATORS and precedence[stack[-1]] >= precedence[token]:
                output.append(stack.pop())
            stack.append(token)
        elif token == '(':
            stack.append(token)
        elif token == ')':
            while stack and stack[-1] != '(':
                output.append(stack.pop())
            stack.pop()
    while stack:
        output.append(stack.pop())
    return output

def encode(tokens, max_len=MAX_LEN):
    ids = [token_to_id[t] for t in tokens] + [EOS_ID]
    return ids + [PAD_ID] * (max_len - len(ids))

def generate_dataset(n, max_depth=MAX_DEPTH):
    X, Y = [], []
    for _ in range(n):
        expr = generate_infix_expression(MAX_DEPTH)
        infix = tokenize(expr)
        postfix = infix_to_postfix(infix)
        X.append(encode(infix))
        Y.append(encode(postfix))
    return np.array(X), np.array(Y)

def shift_right(seqs):
    shifted = np.zeros_like(seqs)
    shifted[:, 1:] = seqs[:, :-1]
    shifted[:, 0] = SOS_ID
    return shifted



<h1><b>Experiments</b></h1>

<h2>Experiment 1: Single-Layer LSTM (128 units)</h2>

- Setup: One LSTM layer in encoder and decoder, 128 units each, embedding dim 128
- Training: 10 epochs, batch size 64
- Result: Training acc 99%, Val acc 99%, Eval acc 86%
- Interpretation: Model fits training set, but doesn't generalize perfectly.


<h2>Experiment 2: Adding Dropout/Regularization</h2>

- Goal: Reduce overfitting seen in previous setup.
- Method: Add dropout and/or L2 regularization to LSTM layers.
- Result: Evaluation accuracy decreased further.



<h2>Experiment 3: Increasing LSTM Units</h2>

- Change: Increased LSTM units in encoder/decoder.
    - To 192 units: evaluation accuracy increased to 90%.
    - To 256 units: evaluation accuracy increased to 94%.


<h2>Experiment 4: Stacked LSTM Layers (2 layers)</h2>

- Change: Added a second LSTM layer (encoder and decoder), each with 192 units.
- Result: Training, validation, and evaluation accuracy all reach 100%.




In [3]:
# 3 --> Model with 2 LSTM layers <--

EMBEDDING_DIM = 192
LSTM_UNITS = 192

# Embedding Layers
encoder_embedding = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True, )
decoder_embedding = Embedding(VOCAB_SIZE, EMBEDDING_DIM, )

# LSTM layers with Dense layer
encoder_lstm1 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True, )
encoder_lstm2 = LSTM(LSTM_UNITS, return_state=True, )
decoder_lstm1 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True,)
decoder_lstm2 = LSTM(LSTM_UNITS, return_sequences=True, return_state=True,)
decoder_dense = TimeDistributed(Dense(VOCAB_SIZE, activation='softmax'), )

# Encoder
encoder_inputs = Input(shape=(MAX_LEN,), )
x = encoder_embedding(encoder_inputs)
encoder_outputs1, state_h1, state_c1 = encoder_lstm1(x)
encoder_outputs2, state_h2, state_c2 = encoder_lstm2(encoder_outputs1)

# Decoder
decoder_inputs = Input(shape=(None,),)
y = decoder_embedding(decoder_inputs)
y, _, _ = decoder_lstm1(y, initial_state=[state_h1, state_c1])
y, _, _ = decoder_lstm2(y, initial_state=[state_h2, state_c2])
decoder_outputs = decoder_dense(y)

# Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [4]:
# Data preparation
X_train, Y_train = generate_dataset(20000)

decoder_input_train = shift_right(Y_train)

# Padding & truncating
X_train = pad_sequences(X_train, maxlen=MAX_LEN, padding='post', truncating='post')
Y_train = pad_sequences(Y_train, maxlen=MAX_LEN, padding='post', truncating='post')
decoder_input_train = pad_sequences(decoder_input_train, maxlen=MAX_LEN, padding='post', truncating='post')

# Training
history = model.fit(
    [X_train, decoder_input_train],
    Y_train,
    batch_size=64,
    epochs=10,
    validation_split=0.2
)

Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 695ms/step - accuracy: 0.8098 - loss: 0.6170 - val_accuracy: 0.9297 - val_loss: 0.1828
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 632ms/step - accuracy: 0.9459 - loss: 0.1431 - val_accuracy: 0.9833 - val_loss: 0.0527
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 574ms/step - accuracy: 0.9871 - loss: 0.0416 - val_accuracy: 0.9914 - val_loss: 0.0261
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 558ms/step - accuracy: 0.9957 - loss: 0.0162 - val_accuracy: 0.9972 - val_loss: 0.0105
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 631ms/step - accuracy: 0.9982 - loss: 0.0081 - val_accuracy: 0.9984 - val_loss: 0.0067
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 561ms/step - accuracy: 0.9988 - loss: 0.0054 - val_accuracy: 0.9982 - val_loss: 0.0062
Epoc

In [5]:
# Training and Validation --> Accuracies & Loss Ploting

history_df = pd.DataFrame(history.history)
history_df['epoch'] = range(1, len(history_df)+1)
fig = px.line(history_df, x='epoch', y=['loss', 'val_loss', 'accuracy', 'val_accuracy'],
              labels={'value': 'Metric Value', 'variable': 'Metric'},
              title='Training Progress')
fig.show()

In [11]:
# --> Inference Models <---

# Encoder inference model
encoder_model = Model(encoder_inputs, [state_h1, state_c1, state_h2, state_c2])

# Decoder inference model
decoder_state_input_h1 = Input(shape=(LSTM_UNITS,))
decoder_state_input_c1 = Input(shape=(LSTM_UNITS,))
decoder_state_input_h2 = Input(shape=(LSTM_UNITS,))
decoder_state_input_c2 = Input(shape=(LSTM_UNITS,))

decoder_inputs_inf = Input(shape=(None,))
y = decoder_embedding(decoder_inputs_inf)
y, out_h1, out_c1 = decoder_lstm1(y, initial_state=[decoder_state_input_h1, decoder_state_input_c1])
y, out_h2, out_c2 = decoder_lstm2(y, initial_state=[decoder_state_input_h2, decoder_state_input_c2])
decoder_outputs_inf = decoder_dense(y)

decoder_model = Model(
    [decoder_inputs_inf, decoder_state_input_h1, decoder_state_input_c1, decoder_state_input_h2, decoder_state_input_c2],
    [decoder_outputs_inf, out_h1, out_c1, out_h2, out_c2]
)



In [12]:
# --> Decoding and Evaluation <--
def decode_sequence(input_seq, tokenizer, reverse_tokenizer, start_token_idx, end_token_idx):
    state_h1, state_c1, state_h2, state_c2 = encoder_model.predict(input_seq, verbose=0)
    target_seq = np.array([[start_token_idx]])
    decoded = []
    for _ in range(MAX_LEN):
        outputs, h1, c1, h2, c2 = decoder_model.predict(
            [target_seq, state_h1, state_c1, state_h2, state_c2], verbose=0)
        token_idx = np.argmax(outputs[0, -1])
        token = reverse_tokenizer.get(token_idx, '')
        if token_idx == end_token_idx:
            break
        decoded.append(token)
        target_seq = np.array([[token_idx]])
        state_h1, state_c1, state_h2, state_c2 = h1, c1, h2, c2
    return ' '.join(decoded)


In [13]:
# --> Evaluating Model <--
def evaluate_model(encoder_model, decoder_model,
                   input_texts, target_texts,
                   tokenizer, reverse_tokenizer,
                   start_token_idx, end_token_idx):
    correct = 0
    total = len(input_texts)
    for i in range(total):
        input_seq = input_texts[i:i+1]
        decoded = decode_sequence(input_seq, tokenizer, reverse_tokenizer, start_token_idx, end_token_idx)
        decoded_clean = decoded.replace('EOS', '').replace('PAD', '').replace('JUNK', '').strip()
        target_clean = ' '.join([
            reverse_tokenizer.get(idx, '') for idx in target_texts[i]
            if idx not in [0, start_token_idx, end_token_idx, tokenizer['PAD'], tokenizer.get('JUNK', -1)]
        ]).strip()
        print(f"Input     : {input_seq}")
        print(f"Predicted : {decoded_clean}")
        print(f"Expected  : {target_clean}")
        print("--------------------------------")
        if decoded_clean == target_clean:
            correct += 1
    accuracy = correct / total
    return accuracy


In [14]:

# --- Test Accuracy ---
accuracies = []
num_rounds = 10
for i in range(num_rounds):
    X_val, Y_val = generate_dataset(20)
    decoder_input_val = shift_right(Y_val)
    X_val = pad_sequences(X_val, maxlen=MAX_LEN, padding='post', truncating='post')
    decoder_input_val = pad_sequences(decoder_input_val, maxlen=MAX_LEN, padding='post', truncating='post')
    Y_val = pad_sequences(Y_val, maxlen=MAX_LEN, padding='post', truncating='post')
    print('Round--->', i+1)

    accuracy = evaluate_model(
        encoder_model,
        decoder_model,
        X_val,
        Y_val,
        token_to_id,
        id_to_token,
        SOS_ID,
        EOS_ID)
    accuracies.append(accuracy)

mean_acc = np.mean(accuracies)
std_acc = np.std(accuracies)
print('\n\n\n')
print("****************************")
print("++++++++++++++++++++++++++++")
print("****************************")
print(f"Mean Accuracy over {num_rounds} rounds: {mean_acc:.4f}")
print(f"Standard Deviation: {std_acc:.4f}")
print("****************************")
print("----------------------------")
print("****************************")

Round---> 1
Input     : [[ 3  3  3 12  6  9  4  6  3 11  5 12  4  4  7  3 12  6 12  4  4  2  0  0
   0  0  0  0  0  0]]
Predicted : d a - c d + - d d - *
Expected  : d a - c d + - d d - *
--------------------------------
Input     : [[ 3  3 10  7 12  4  8  3 13  8 10  4  4  2  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0]]
Predicted : b d * e b / /
Expected  : b d * e b / /
--------------------------------
Input     : [[ 3 10  6  3 12  7  3 10  8 10  4  4  4  2  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0]]
Predicted : b d b b / * -
Expected  : b d b b / * -
--------------------------------
Input     : [[ 3  3 12  8  9  4  7  3 13  8  9  4  4  2  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0]]
Predicted : d a / e a / *
Expected  : d a / e a / *
--------------------------------
Input     : [[ 3  3  3  9  7 10  4  7 11  4  7  3 12  5  3 13  6 13  4  4  4  2  0  0
   0  0  0  0  0  0]]
Predicted : a b * c * d e e - + *
Expected  : a b * c * d e e - + *
------------------------

In [15]:
from google.colab import files

# Save weights with the required extension
model.save_weights('encoder_decoder.weights.h5')

# Download the file to your computer
files.download('encoder_decoder.weights.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
!pip install -q gdown
import gdown

file_id = '1EOW8GxzyO1SPs3fT4BtCMvB0Vo2nsNTz'  # Replace with your real file id
output = 'encoder_decoder_weights.h5'
gdown.download(f'https://drive.google.com/uc?id={file_id}', output, quiet=False)

model.load_weights(output)  # Make sure you define your model architecture first!
print("Weights loaded successfully!")

Downloading...
From: https://drive.google.com/uc?id=1EOW8GxzyO1SPs3fT4BtCMvB0Vo2nsNTz
To: /content/encoder_decoder_weights.h5
100%|██████████| 14.4M/14.4M [00:00<00:00, 37.5MB/s]


ValueError: Layer count mismatch when loading weights from file. Model expected 7 layers, found 0 saved layers.