In [5]:
import tensorflow as tf
import numpy as np
import csv

In [None]:
# Load the data from the CSV file
with open('data.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
    data = []
    for row in reader:
        data.append(row)

In [None]:
print(len(data)) 
print(len(data[0]))

In [180]:
# Extract the questions and answers from the data
questions = [row[0] for row in data]
answers = [row[1] for row in data]

In [181]:
# Build a vocabulary from the questions and answers
vocab = set()
for q, a in zip(questions, answers):
    vocab |= set(q.lower().split())
    vocab |= set(a.lower().split())
vocab = sorted(list(vocab))

In [182]:

vocab_size = len(vocab)
seq_len = 15  # set a fixed sequence length
question_seqs = np.zeros((len(questions), seq_len), dtype=np.int32)
answer_seqs = np.zeros((len(answers), seq_len), dtype=np.int32)
for i, (q, a) in enumerate(zip(questions, answers)):
    q_seq = [vocab.index(w.lower()) for w in q.split()]
    a_seq = [vocab.index(w.lower()) for w in a.split()]
    q_seq = q_seq[:seq_len]  # truncate questions longer than seq_len
    a_seq = a_seq[:seq_len]  # truncate answers longer than seq_len
    question_seqs[i, :len(q_seq)] = q_seq
    answer_seqs[i, :len(a_seq)] = a_seq

In [183]:
# Build the model
embedding_dim = 128
rnn_units = 256
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.LSTM(rnn_units, return_sequences=True),
    tf.keras.layers.Dense(vocab_size)
])

In [184]:
# Compile the model
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))


In [2]:
# Train the model
model.fit(question_seqs, answer_seqs, epochs=500)

In [186]:
# Define a function to generate responses
def generate_response(model, question, vocab):
    question_seq = np.zeros((1, seq_len), dtype=np.int32)
    q_seq = [vocab.index(w.lower()) for w in question.split()]
    question_seq[0, :len(q_seq)] = q_seq
    logits = model.predict(question_seq)
    response_seq = tf.argmax(logits, axis=-1)[0]
    response = ' '.join([vocab[i] for i in response_seq if i != 0])
    return response

In [3]:
# Test the model
while True:
    question = input('You: ')
    if question == 'exit':
        break
        
    response = generate_response(model, question, vocab)
    print('Bot:', response)
    