In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [3]:
conversations = [
    ("Hi", "Hello! How can I help you?"),
    ("What is your name?", "I am a chatbot."),
    ("How are you?", "I'm just code, but thanks for asking!"),
    ("Goodbye", "Goodbye! Have a nice day!"),
]

In [4]:
questions, answers = zip(*conversations)

In [5]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers)

In [6]:
question_sequences = tokenizer.texts_to_sequences(questions)
answer_sequences = tokenizer.texts_to_sequences(answers)

In [7]:
vocab_size = len(tokenizer.word_index) + 1

In [8]:
max_seq_length = max(len(seq) for seq in question_sequences + answer_sequences)

In [9]:
question_sequences = pad_sequences(question_sequences, maxlen=max_seq_length, padding='post')
answer_sequences = pad_sequences(answer_sequences, maxlen=max_seq_length, padding='post')

In [11]:
labels = np.array(answer_sequences)

In [12]:
print(f"Shape of question_sequences: {question_sequences.shape}") 
print(f"Shape of labels: {labels.shape}") 

Shape of question_sequences: (4, 7)
Shape of labels: (4, 7)


In [14]:
embedding_dim = 64
lstm_units = 128

model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_seq_length),
    LSTM(lstm_units, return_sequences=True),  # Return sequences for each step
    Dense(vocab_size, activation='softmax')  # Predict the next word in the sequence
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [15]:
model.summary()

In [16]:
labels = np.expand_dims(labels, -1)

model.fit(question_sequences, labels, epochs=10, batch_size=16)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0357 - loss: 3.2959
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.1429 - loss: 3.2871
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.2143 - loss: 3.2780
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - accuracy: 0.2143 - loss: 3.2683
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.2143 - loss: 3.2577
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.2143 - loss: 3.2458
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.2143 - loss: 3.2322
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.2143 - loss: 3.2163
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x2524f0f76b0>

In [18]:
model.save('chatbot_model.h5')



In [19]:
def respond(user_input):
    user_sequence = tokenizer.texts_to_sequences([user_input])
    user_sequence = pad_sequences(user_sequence, maxlen=max_seq_length, padding='post')
    prediction = model.predict(user_sequence)
    response_indices = np.argmax(prediction[0], axis=-1)  # Get predicted word indices
    response_words = [tokenizer.index_word.get(idx, "") for idx in response_indices if idx > 0]
    return " ".join(response_words)

In [20]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Chatbot: Goodbye!")
        break
    response = respond(user_input)
    print(f"Chatbot: {response}")

You:  hii


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295ms/step
Chatbot: 


You:  c+


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
Chatbot: 


You:  how r u?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Chatbot: 


You:  exit


Chatbot: Goodbye!
