In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import re
import string

In [None]:
# Load the data from the CSV file
data = pd.read_csv('data.csv')

In [None]:
# Preprocess the data
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Remove numbers
    text = re.sub(r'\d+', '', text)
    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text)
    return text

data['text'] = data['text'].apply(preprocess_text)

In [None]:
# Create the vocabulary
vocab = set()
for text in data['text']:
    vocab.update(text.split())

In [None]:
# Create a mapping from words to integers
word2idx = {}
for i, word in enumerate(vocab):
    word2idx[word] = i

In [None]:
# Create a mapping from integers to words
idx2word = np.array(list(vocab))

In [None]:
# Convert the text data to sequences of integers
sequences = []
for text in data['text']:
    seq = [word2idx[word] for word in text.split()]
    sequences.append(seq)

In [None]:
# Define the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(len(vocab), 64),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(len(vocab), activation='softmax')
])


In [None]:
# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [None]:
# Train the model
model.fit(sequences, np.array(sequences), epochs=10)


In [None]:
# Generate a response
def generate_response(model, text, word2idx, idx2word):
    seq = [word2idx[word] for word in text.split()]
    seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=50)
    pred = model.predict(seq)[0]
    next_word_idx = np.argmax(pred)
    next_word = idx2word[next_word_idx]
    return next_word

In [None]:
# Test the model
while True:
    question = input('You: ')
    if question == 'exit':
        break
    response = generate_response(model, question, word2idx, idx2word)
    print('Bot:', response)       