<a href="https://colab.research.google.com/github/samsiroos/SLM-Architecture/blob/main/%DA%86%D8%AA%E2%80%8C%D8%A8%D8%A7%D8%AA_%D8%B3%D8%A7%D8%AF%D9%87_%D8%A8%D8%A7_LSTM_%D8%AF%D8%B1_Google_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Attention, Concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
import numpy as np
import os # For file system operations

print("کتابخانه‌ها با موفقیت وارد شدند.")

# --- 1. Prepare Training Data from File ---
qa_data_file = "qa_data.txt"
model_filename = "chatbot_lstm_attention_model.keras" # New model filename

input_texts = []
target_texts = []

try:
    with open(qa_data_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line and ":::" in line:
                parts = line.split(":::", 1)
                if len(parts) == 2:
                    input_texts.append(parts[0].strip())
                    target_texts.append("_start_ " + parts[1].strip() + " _end_")
                else:
                    print(f"Warning: Skipping malformed line in {qa_data_file}: {line}")
            elif line:
                print(f"Warning: Skipping line without ':::' separator in {qa_data_file}: {line}")

    if not input_texts:
        print(f"Error: No valid Q&A pairs found in '{qa_data_file}'. Please ensure it's correctly formatted and not empty.")
        exit()
    print(f"تعداد جفت‌های پرسش و پاسخ بارگذاری شده از فایل: {len(input_texts)}\n")
except FileNotFoundError:
    print(f"Error: The file '{qa_data_file}' was not found. Please ensure it's in the correct path.")
    exit()
except Exception as e:
    print(f"Error reading {qa_data_file}: {e}")
    exit()

# --- Tokenization ---
# Keep special tokens _start_ and _end_
custom_filters = '!"#$%&()*+,-./:;<=>?@[\\]^`{|}~\t\n'
tokenizer = Tokenizer(lower=True, filters=custom_filters)
tokenizer.fit_on_texts(input_texts + target_texts) # Fit on both input and target texts

word_index = tokenizer.word_index
print(f"فهرست کلمات و ایندکس‌های آن‌ها (نمونه):\n{list(word_index.items())[:10]}...\n")

total_words = len(word_index) + 1
print(f"تعداد کل کلمات منحصر به فرد در واژه‌نامه: {total_words}\n")

encoder_input_sequences = tokenizer.texts_to_sequences(input_texts)
decoder_input_sequences = tokenizer.texts_to_sequences(target_texts)

max_encoder_seq_len = max([len(seq) for seq in encoder_input_sequences])
max_decoder_seq_len = max([len(seq) for seq in decoder_input_sequences])

print(f"حداکثر طول دنباله ورودی (پرسش): {max_encoder_seq_len}")
print(f"حداکثر طول دنباله خروجی (پاسخ): {max_decoder_seq_len}\n")

encoder_input_data = tf.keras.preprocessing.sequence.pad_sequences(encoder_input_sequences,
                                                                   maxlen=max_encoder_seq_len,
                                                                   padding='post')

decoder_input_data = tf.keras.preprocessing.sequence.pad_sequences(decoder_input_sequences,
                                                                   maxlen=max_decoder_seq_len,
                                                                   padding='post')

decoder_target_data = np.zeros(
    (len(target_texts), max_decoder_seq_len, total_words),
    dtype='float32'
)

for i, seq in enumerate(decoder_input_sequences):
    for t, word_idx in enumerate(seq):
        if t > 0: # Shift target by one timestep
            decoder_target_data[i, t-1, word_idx] = 1.0

print(f"ابعاد ورودی Encoder (X_encoder): {encoder_input_data.shape}")
print(f"ابعاد ورودی Decoder (X_decoder): {decoder_input_data.shape}")
print(f"ابعاد خروجی Decoder (Y_decoder_target): {decoder_target_data.shape}\n")


# --- Check for existing model and handle training/loading ---
model = None
train_model = True # Default to train if no model exists or user wants to retrain

if os.path.exists(model_filename):
    print(f"\nمدل موجود یافت شد: '{model_filename}'.")
    user_choice = input("آیا می‌خواهید مدل را دوباره آموزش دهید؟ (بله/خیر): ").lower()
    if user_choice == 'خیر':
        train_model = False
        try:
            model = load_model(model_filename)
            print(f"مدل با موفقیت از '{model_filename}' بارگذاری شد.")
        except Exception as e:
            print(f"خطا در بارگذاری مدل: {e}. با آموزش یک مدل جدید ادامه خواهیم داد.")
            train_model = True # Fallback to training if loading fails
    else:
        print("آموزش مجدد توسط کاربر درخواست شد.")
else:
    print(f"\nمدل موجودی یافت نشد در '{model_filename}'. یک مدل جدید آموزش داده خواهد شد.")

if train_model:
    # --- 2. Build the Encoder-Decoder LSTM Model with Attention ---
    embedding_dim = 256
    lstm_units = 256 # Increased LSTM units for more capacity

    # --- Encoder ---
    encoder_inputs = Input(shape=(max_encoder_seq_len,))
    encoder_embedding = Embedding(total_words, embedding_dim, name='encoder_embedding')(encoder_inputs)

    # Encoder LSTM 1: Returns sequences and states
    encoder_lstm_1_output_seq, encoder_state_h1, encoder_state_c1 = LSTM(lstm_units, return_sequences=True, return_state=True, name='encoder_lstm_1')(encoder_embedding)

    # Encoder LSTM 2: Returns sequences and its final states (context vectors).
    # It takes the sequence output from encoder_lstm_1 as its input.
    # The initial_state for this LSTM is the states from the first LSTM.
    encoder_outputs_seq, state_h, state_c = LSTM(lstm_units, return_sequences=True, return_state=True, name='encoder_lstm_2')(encoder_lstm_1_output_seq, initial_state=[encoder_state_h1, encoder_state_c1])

    encoder_states = [state_h, state_c] # The final context states (hidden and cell) from the last encoder LSTM

    # --- Decoder ---
    decoder_inputs = Input(shape=(max_decoder_seq_len,))
    decoder_embedding = Embedding(total_words, embedding_dim, name='decoder_embedding')(decoder_inputs)

    # Decoder LSTM 1: Takes encoder's final states as initial_state. Returns sequence and states.
    decoder_lstm_1_layer = LSTM(lstm_units, return_sequences=True, return_state=True, name='decoder_lstm_1')
    decoder_outputs_1_seq, decoder_state_h1, decoder_state_c1 = decoder_lstm_1_layer(decoder_embedding, initial_state=encoder_states)

    # Decoder LSTM 2: Takes sequence output from decoder_lstm_1. Returns sequence and states.
    # Initial states are the states from the first decoder LSTM.
    decoder_lstm_2_layer = LSTM(lstm_units, return_sequences=True, return_state=True, name='decoder_lstm_2')
    decoder_outputs_2_seq, decoder_state_h2, decoder_state_c2 = decoder_lstm_2_layer(decoder_outputs_1_seq, initial_state=[decoder_state_h1, decoder_state_c1])

    # Attention Mechanism
    # Query: Decoder's output sequence (decoder_outputs_2_seq)
    # Value: Encoder's output sequence (encoder_outputs_seq)
    attention_output = Attention(name='attention_layer')([decoder_outputs_2_seq, encoder_outputs_seq])

    # Concatenate attention output with decoder's LSTM output
    decoder_concat_input = Concatenate(axis=-1, name='decoder_attention_concat')([decoder_outputs_2_seq, attention_output])

    # Dense layer for output prediction
    decoder_dense = Dense(total_words, activation='softmax', name='decoder_output_dense')
    decoder_outputs = decoder_dense(decoder_concat_input)

    # Define the training model
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    model.summary()

    # --- 3. Train the Model ---
    epochs = 200 # Increased epochs
    batch_size = 8 # Adjusted batch size for this dataset size

    print("\nشروع آموزش مدل چت‌بات LSTM با Attention...")
    history = model.fit(
        [encoder_input_data, decoder_input_data],
        decoder_target_data,
        batch_size=batch_size,
        epochs=epochs,
        verbose=1
    )

    print("\nآموزش مدل به پایان رسید.")
    print(f"دقت نهایی مدل در آموزش: {history.history['accuracy'][-1]:.4f}")
    print(f"خطای نهایی مدل در آموزش: {history.history['loss'][-1]:.4f}")

    # Save the trained model
    model.save(model_filename)
    print(f"مدل با موفقیت در '{model_filename}' ذخیره شد.")
else:
    # If the model was loaded, display its summary
    if model:
        print("\nدر حال استفاده از مدل از پیش موجود.")
        model.summary()
    else:
        print("خطا: مدل نتوانست بارگذاری یا ساخته شود. لطفاً تنظیمات را بررسی کنید.")
        exit()

# --- 4. Build Inference Models (for actual chatting) ---
# Encoder (Inference) Model: Takes input sequence and outputs the encoder's sequence output and final states.
encoder_inputs_inf = Input(shape=(max_encoder_seq_len,))
encoder_embedding_inf = model.get_layer('encoder_embedding')(encoder_inputs_inf)

encoder_lstm_1_inf_layer = model.get_layer('encoder_lstm_1')
encoder_lstm_2_inf_layer = model.get_layer('encoder_lstm_2')

encoder_lstm_1_output_seq_inf, encoder_state_h1_inf, encoder_state_c1_inf = encoder_lstm_1_inf_layer(encoder_embedding_inf)
encoder_outputs_seq_inf, encoder_state_h2_inf, encoder_state_c2_inf = encoder_lstm_2_inf_layer(encoder_lstm_1_output_seq_inf, initial_state=[encoder_state_h1_inf, encoder_state_c1_inf])

encoder_model = Model(encoder_inputs_inf, [encoder_outputs_seq_inf, encoder_state_h1_inf, encoder_state_c1_inf, encoder_state_h2_inf, encoder_state_c2_inf])

# Decoder (Inference) Model: Takes decoder input, encoder's output sequence, and previous decoder states,
# outputs predictions and new states.
decoder_state_input_h1 = Input(shape=(lstm_units,), name='decoder_state_input_h1')
decoder_state_input_c1 = Input(shape=(lstm_units,), name='decoder_state_input_c1')
decoder_state_input_h2 = Input(shape=(lstm_units,), name='decoder_state_input_h2')
decoder_state_input_c2 = Input(shape=(lstm_units,), name='decoder_state_input_c2')
decoder_states_inputs = [decoder_state_input_h1, decoder_state_input_c1, decoder_state_input_h2, decoder_state_input_c2]

encoder_outputs_seq_decoder_input = Input(shape=(max_encoder_seq_len, lstm_units), name='encoder_outputs_seq_decoder_input') # Input for encoder's output sequence for attention

_decoder_inputs_inf = Input(shape=(1,)) # Decoder input for inference is always a single token at a time
_decoder_embedding_inf = model.get_layer('decoder_embedding')(_decoder_inputs_inf)

# Re-use LSTM layers and Dense layer from the training model
decoder_lstm_1_inf_layer = model.get_layer('decoder_lstm_1')
decoder_lstm_2_inf_layer = model.get_layer('decoder_lstm_2')
attention_inf_layer = model.get_layer('attention_layer')
decoder_concat_inf_layer = model.get_layer('decoder_attention_concat')
decoder_dense_inf_layer = model.get_layer('decoder_output_dense')

# Pass initial states to the first LSTM
decoder_outputs_lstm1_inf, state_h1_new, state_c1_new = decoder_lstm_1_inf_layer(_decoder_embedding_inf, initial_state=[decoder_states_inputs[0], decoder_states_inputs[1]])

# Pass the output sequence from the first LSTM to the second, and its own initial states
decoder_outputs_lstm2_inf, state_h2_new, state_c2_new = decoder_lstm_2_inf_layer(decoder_outputs_lstm1_inf, initial_state=[decoder_states_inputs[2], decoder_states_inputs[3]])

decoder_states_outputs = [state_h1_new, state_c1_new, state_h2_new, state_c2_new] # Collect new states from both LSTMs

_decoder_outputs_inf = decoder_dense_inf_layer(decoder_concat_inf_layer([decoder_outputs_lstm2_inf, attention_inf_layer([decoder_outputs_lstm2_inf, encoder_outputs_seq_decoder_input])]))

decoder_model = Model(
    [_decoder_inputs_inf, encoder_outputs_seq_decoder_input] + decoder_states_inputs,
    [_decoder_outputs_inf] + decoder_states_outputs
)

# --- Function to decode sequence (generate response) ---
reverse_word_index = dict(map(reversed, word_index.items())) # Map indices back to words

def decode_sequence(input_sentence, temperature=0.7): # Added temperature parameter
    # Convert input sentence to sequence of integers
    input_seq = tokenizer.texts_to_sequences([input_sentence])[0]
    input_seq = tf.keras.preprocessing.sequence.pad_sequences([input_seq],
                                                               maxlen=max_encoder_seq_len,
                                                               padding='post')

    # Get the initial states (context vectors) and encoder output sequence from the encoder
    encoder_output_sequence, h1_enc, c1_enc, h2_enc, c2_enc = encoder_model.predict(input_seq, verbose=0)

    # Generate empty target sequence of length 1 (for the start token)
    target_seq = np.zeros((1, 1))
    # Populate the first character of target sequence with the start token.
    target_seq[0, 0] = word_index['_start_']

    # Initialize decoder states for the first step of decoding
    decoder_initial_states = [h1_enc, c1_enc, h2_enc, c2_enc]

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        # Predict next token and new states
        output_tokens, h1, c1, h2, c2 = decoder_model.predict(
            [target_seq, encoder_output_sequence] + decoder_initial_states, verbose=0)

        # Apply temperature for sampling
        output_tokens = output_tokens[0, -1, :] / temperature
        # Use softmax to get probabilities
        probabilities = tf.nn.softmax(output_tokens).numpy()
        # Sample a token based on probabilities
        sampled_token_index = np.random.choice(len(probabilities), p=probabilities)

        sampled_word = reverse_word_index.get(sampled_token_index, '<unk>') # Get word from index, use <unk> for unknown

        # Check for end of sequence or max length
        if sampled_word == '_end_' or len(decoded_sentence.split()) >= max_decoder_seq_len - 1: # -1 for _start_ token
            stop_condition = True
        else:
            decoded_sentence += ' ' + sampled_word

        # Update the target sequence (of length 1) for the next timestep
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update states for the next iteration
        decoder_initial_states = [h1, c1, h2, c2] # Pass the new states from current step to next step

    return decoded_sentence.strip()

# --- Interactive Chat Loop ---
print("\n--- چت‌بات LSTM با Attention آماده است! ---")
print("برای خروج، 'خروج' را تایپ کنید.")

while True:
    user_input = input("\nشما: ")
    if user_input.lower() == 'خروج':
        print("چت‌بات: خداحافظ، روز خوبی داشته باشید.")
        break

    # Generate response
    response = decode_sequence(user_input.lower(), temperature=0.7) # Use temperature for response generation
    print(f"چت‌بات: {response}")


کتابخانه‌ها با موفقیت وارد شدند.
تعداد جفت‌های پرسش و پاسخ بارگذاری شده از فایل: 3030

فهرست کلمات و ایندکس‌های آن‌ها (نمونه):
[('_start_', 1), ('_end_', 2), ('من', 3), ('آیا', 4), ('می\u200cتوانی', 5), ('باشی؟', 6), ('نیستم', 7), ('هستم', 8), ('با', 9), ('خلاق', 10)]...

تعداد کل کلمات منحصر به فرد در واژه‌نامه: 429

حداکثر طول دنباله ورودی (پرسش): 7
حداکثر طول دنباله خروجی (پاسخ): 15

ابعاد ورودی Encoder (X_encoder): (3030, 7)
ابعاد ورودی Decoder (X_decoder): (3030, 15)
ابعاد خروجی Decoder (Y_decoder_target): (3030, 15, 429)


مدل موجود یافت شد: 'chatbot_lstm_attention_model.keras'.
آیا می‌خواهید مدل را دوباره آموزش دهید؟ (بله/خیر): بله
آموزش مجدد توسط کاربر درخواست شد.



شروع آموزش مدل چت‌بات LSTM با Attention...
Epoch 1/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.1487 - loss: 0.7731
Epoch 2/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.1966 - loss: 0.4744
Epoch 3/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.2013 - loss: 0.4078
Epoch 4/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.2037 - loss: 0.3889
Epoch 5/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.2125 - loss: 0.3636
Epoch 6/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.2233 - loss: 0.3025
Epoch 7/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.2339 - loss: 0.2615
Epoch 8/200
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.2433