In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random

In [9]:
def generate_balanced_sequence(length):
    seq = []
    for _ in range(length // 2):
        seq.append('(')
    for _ in range(length // 2):
        seq.append(')')
    return ''.join(seq)

def generate_unbalanced_sequence(length):
    seq = [random.choice(['(', ')']) for _ in range(length)]
    if seq.count('(') == seq.count(')'):
        seq[-1] = '('
    return ''.join(seq)

In [10]:
def generate_dataset(n_samples=30000, maxlen=30):
    X, y = [], []
    for _ in range(n_samples // 2):
        s = generate_balanced_sequence(random.choice(range(6, maxlen, 2)))
        X.append(s)
        y.append(1)
    for _ in range(n_samples // 2):
        s = generate_unbalanced_sequence(random.randint(6, maxlen))
        X.append(s)
        y.append(0)
    return X, y

X_text, y = generate_dataset()

In [11]:
char_to_int = {'(': 1, ')': 2}
X_seq = [[char_to_int[c] for c in seq] for seq in X_text]
max_seq_len = max(len(x) for x in X_seq)
X_pad = pad_sequences(X_seq, maxlen=max_seq_len, padding='post')
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42, stratify=y)


In [12]:
def build_model(cell_type='LSTM'):
    model = Sequential()
    model.add(Embedding(input_dim=3, output_dim=16, input_length=max_seq_len))
    if cell_type == 'RNN':
        model.add(SimpleRNN(32))
    elif cell_type == 'GRU':
        model.add(GRU(32))
    else:
        model.add(LSTM(32))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [13]:
results = {}
models = {}

for cell in ['RNN', 'LSTM', 'GRU']:
    print(f"\nTraining {cell} model...")
    model = build_model(cell)
    history = model.fit(
        X_train, np.array(y_train),
        validation_split=0.2,
        epochs=5,
        batch_size=64,
        verbose=1
    )
    score = model.evaluate(X_test, np.array(y_test), verbose=0)
    print(f"{cell} Test Accuracy: {score[1]*100:.2f}%")
    results[cell] = history
    models[cell] = model


Training RNN model...




Epoch 1/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.9152 - loss: 0.2766 - val_accuracy: 0.9767 - val_loss: 0.0894
Epoch 2/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.9758 - loss: 0.0968 - val_accuracy: 0.9752 - val_loss: 0.1149
Epoch 3/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9693 - loss: 0.1093 - val_accuracy: 0.9910 - val_loss: 0.0405
Epoch 4/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8348 - loss: 0.3765 - val_accuracy: 0.9800 - val_loss: 0.0760
Epoch 5/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9846 - loss: 0.0588 - val_accuracy: 0.9925 - val_loss: 0.0241
RNN Test Accuracy: 99.32%

Training LSTM model...
Epoch 1/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.8288 - loss: 0.3384 - val_accuracy: 0.9956 

In [16]:
def predict_sequence(model, seq_str):
    seq_encoded = [char_to_int[c] for c in seq_str]
    seq_padded = pad_sequences([seq_encoded], maxlen=max_seq_len, padding='post')
    prob = model.predict(seq_padded, verbose=0)[0][0]
    return prob, "Balanced" if prob >= 0.5 else "Unbalanced"

test_sequences = [
    "(()(()))",   # balanced
    "(()())(()",  # unbalanced
    "((()))",     # balanced
    "()))(",      # unbalanced
]

print("\n=== Sample Predictions ===")
for seq in test_sequences:
    print(f"\nSequence: {seq}")
    for name, model in models.items():
        prob, label = predict_sequence(model, seq)
        print(f"{name}: {label} ")


=== Sample Predictions ===

Sequence: (()(()))
RNN: Balanced 
LSTM: Unbalanced 
GRU: Unbalanced 

Sequence: (()())(()
RNN: Unbalanced 
LSTM: Unbalanced 
GRU: Unbalanced 

Sequence: ((()))
RNN: Balanced 
LSTM: Balanced 
GRU: Balanced 

Sequence: ()))(
RNN: Unbalanced 
LSTM: Unbalanced 
GRU: Unbalanced 
