In [None]:
# pip install torch

In [None]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using Device: {device}")

    device_no = torch.cuda.current_device()
    print(f"Current device number is: {device_no}")

    device_name = torch.cuda.get_device_name(device_no)
    print(f"GPU name is: {device_name}")
else:
    print("CUDA is not available")

In [None]:
import torch #an open source ML library used for creating deep neural networks
import torch.nn as nn # A module in PyTorch that provides classes and functions to build neural networks
import torch.optim as optim # A module in PyTorch that provides various optimization algorithms for training neural networks
import random # A module that implements pseudo-random number generators for various distributions
import re # A module for working with regular expressions to match and manipulate strings
import numpy as np

# Sample dataset
data = [
    ("hello", "hi"),
    ("how are you", "I'm good, how about you?"),
    ("what is your name", "I'm a chatbot"),
    ("bye", "goodbye"),
]

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text) # removes all characters from the input text that are not word characters or whitespace
    return text

# Vocabulary
all_words = []
for (pattern, response) in data:
    pattern = preprocess(pattern)
    response = preprocess(response)
    words = pattern.split() + response.split() #splits it into a list of words, using whitespace as the delimiter: "hello Ameer" to ["hello","Aeer"]
    all_words.extend(words) #appends the elements of the list words to the end of the list all_words
all_words = sorted(set(all_words))


# Word to index mapping
word_to_idx = {word: idx for idx, word in enumerate(all_words)} #enumerate iterates on pairs of (index, value) tuples.

# Encode patterns and responses
def encode(text):
    text = preprocess(text)
    return [word_to_idx[word] for word in text.split() if word in word_to_idx]

encoded_data = [(encode(pattern), encode(response)) for (pattern, response) in data]

# Pad sequences: ad_sequence function is used to ensure that all sequences in a dataset have the same length
def pad_sequence(seq, max_len, padding_value=0): #seq is input sequence that needs to be padded, max_len is desired length for all sequences, padding_val is alue used to fill the sequence to reach the maximum length
    return seq + [padding_value] * (max_len - len(seq)) #If the input sequence is shorter than the max_len, it appends padding_value to the end of the sequence until it reaches the desired length.

# Determine the maximum length of patterns and responses
max_pattern_len = max(len(pattern) for pattern, response in encoded_data)
max_response_len = max(len(response) for pattern, response in encoded_data)
max_len = max(max_pattern_len, max_response_len)

# Pad patterns and responses
padded_patterns = [pad_sequence(pattern, max_len) for pattern, response in encoded_data]
padded_responses = [pad_sequence(response, max_len) for pattern, response in encoded_data]

print(f"Max Pattern Length: {max_pattern_len}")
print(f"Max Response Length: {max_response_len}")
# print(f"Max Length: {max_len}")
print(f"Padded Pattern: {padded_patterns}")
print(f"Padded Response: {padded_responses}")

# Additional debugging:
print("Example padded pattern:", padded_patterns[0])
print("Example padded response:", padded_responses[0])

In [None]:
# Convert to tensors: preparing it for training for efficient computations, leverages GPU acceleration, and integrates seamlessly with the PyTorch ecosystem.
patterns = torch.tensor(padded_patterns, dtype=torch.long)
responses = torch.tensor(padded_responses, dtype=torch.long)

# patterns = torch.tensor([pattern for pattern in padded_patterns], dtype=torch.long)
# responses = torch.tensor([response for response in padded_responses], dtype=torch.long)


In [None]:
patterns # gives 2D tensor intergers with 4 rows and 4 columns, making it a 4x4 matrix.
# responses

In [None]:
class ChatbotModel(nn.Module): #defines a basic chatbot model architecture using PyTorch
    def __init__(self, vocab_size, embed_size, hidden_size, output_size,max_len): #Initializes the model with hyperparameters
        super(ChatbotModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size) #An embedding layer to convert word indices to dense vectors. word indices are numerical representations of words in a vocabulary. vocabulary:["AMeer", "Rai"], and word indices: "Ameer":0, "rai":1
        self.lstm = nn.LSTM(embed_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size * max_len, output_size * max_len)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = x.contiguous().view(x.size(0), -1)  # Flatten the output for the linear layer
        x = self.fc(x)
        return x.view(x.size(0), max_len, -1)  # Reshape to (batch_size, max_len, output_size)

# Initialize model
vocab_size = len(all_words)
embed_size = 10 #Dimensionality of word embeddings.
hidden_size = 20 #number of neurons in the hidden laye
# output_size = max_response_len #Size of the output layer (likely the maximum length of a response
output_size = vocab_size  # Output size should match the vocabulary size


max_len = max(max_pattern_len, max_response_len)
model = ChatbotModel(vocab_size, embed_size, hidden_size, output_size,max_len)  #simple feed-forward neural network for generating chatbot responses
print(model)

print(f"Vocabulary Size: {vocab_size}")
print(f"Output Size: {output_size}")
print(f"Word to Index Mapping: {word_to_idx}")
print(f"Patterns Tensor: {patterns}")
print(f"Max Length: {max_len}")

for pattern in patterns:
    for idx in pattern:
        if idx >= vocab_size:
            print(f"Invalid Index: {idx}")


In [None]:
# Define loss function and optimizer :
criterion = nn.CrossEntropyLoss() #commonly used loss function for classification problems. It measures the difference between the predicted probability distribution and the actual distribution
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #popular optimization algorithm that combines the advantages of Adagrad and RMSprop. lr parameter (learning rate) determines the step size the optimizer will take when updating parameters.

# Additional debugging:
print("Example padded pattern:", padded_patterns[0])
print("Example padded response:", padded_responses[0])

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    optimizer.zero_grad()  # Clear the gradients(rate of change of the loss function with respect to the model's parameters.) from the previous step, resets the accumulated gradients for each epoch.
    outputs = model(patterns)  # Forward pass: Compute predicted outputs by passing inputs to the model

    # Reshape outputs and responses for loss computation to match the expected input format
    outputs = outputs.view(-1, output_size)  # Shape: (batch_size, sequence_length, output_size) to (batch_size * sequence_length, output_size)
    responses = responses.view(-1)  # Shape: (batch_size, sequence_length) to (batch_size * sequence_length)  # Convert to long for CrossEntropyLoss

    print(f"Epoch {epoch+1}: Outputs shape: {outputs.shape}, Responses shape: {responses.shape}")  # Debug print

    # Additional debugging:
    print("Outuput shape:", outputs.shape[0])
    print("Response shape:", responses.shape[0])

    #  Check for size mismatch (optional, but helpful for debugging)
    if outputs.shape[0] != responses.shape[0]:
        print(f"WARNING: Mismatch in output and response sizes. Outputs: {outputs.shape}, Responses: {responses.shape}")

    # Proceed with loss calculation only if sizes match
    else:
      loss = criterion(outputs, responses)  # Compute the loss and difference between predicted and actual outputs.
      loss.backward()  # Backward pass: Compute gradient of the loss with respect to model parameters
      optimizer.step()  # Update model parameters based on computed gradients
      # Print the loss every 100 epochs if epochs =1000
      # if (epoch + 1) % 100 == 0:
      print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


In [None]:
#Debugging

# After forward pass
outputs = model(patterns)  # Assuming `outputs` is the model's output
print("Epoch {}: Outputs shape: {}".format(epoch, outputs.shape))

# If needed, reshaping outputs for loss calculation
outputs_reshaped = outputs.view(-1, outputs.size(-1))
print("Epoch {}: Reshaped Outputs shape: {}".format(epoch, outputs_reshaped.shape))

# Print the target shape
print("Epoch {}: Responses shape: {}".format(epoch, responses.shape))

# Ensure outputs and targets have matching dimensions
print("Epoch {}: Output size: {}".format(epoch, outputs.size()))
print("Epoch {}: Target size: {}".format(epoch, responses.size()))

# Example padded pattern and response
print("Example padded pattern: {}".format(patterns[0].tolist()))
print("Example padded response: {}".format(responses[0].tolist()))

# Assuming vocab_size is defined somewhere in your code
print("Vocabulary Size: {}".format(vocab_size))

# Example with CrossEntropyLoss
criterion = nn.CrossEntropyLoss()

# Debug reshaping logic
print("Before reshaping Outputs shape: {}".format(outputs.shape))
print("After reshaping Outputs shape: {}".format(outputs_reshaped.shape))



In [None]:
#prediction
def predict_response(input_text): #processing the input text, feeding it to a model, and generating a response.
    input_text = preprocess(input_text) #preprocessing: perform some initial cleaning or transformation on the input text like lowercasing, removing punctuation, tokenization, or stemming.
    input_pattern = encode(input_text) #encoding: preprocessed text is converted into a numerical representation or pattern
    input_pattern = pad_sequence(input_pattern, max_len) #padding: ensures that all input patterns have the same length (max_len) by adding padding
    input_pattern = torch.tensor(input_pattern, dtype=torch.long).unsqueeze(0) #tensor conversion: padded input pattern is converted into a PyTorch tensor with data type torch.long


    output = model(input_pattern) #model predictin: repared input tensor is fed into the neural network model to generate an output
    _, predicted = torch.max(output, dim=2) # finds the index of the most probable response for each time step in the output
    predicted = predicted.squeeze(0).numpy() #dimension reduction: The predicted indices are squeezed to remove unnecessary dimensions and converted to a NumPy array for easier manipulation.

    response_words = [all_words[idx] for idx in predicted if idx in word_to_idx.values()] #decoding: predicted indices are mapped back to words using the all_words vocabulary.
    response_text = ' '.join(response_words) #Response Formation: redicted words are joined together to form the final response text.
    return response_text

# Chat with the bot
print("Start chatting with the bot (type 'quit' to stop)!")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = predict_response(user_input)
    print(f"Bot: {response}")


# Solved Working Chatbot

## 1. Training Chatbot (10 epochs)

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import re
import numpy as np

# Load dataset
def load_dataset(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = [line.strip().split('\t') for line in file.readlines()]
    return data

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Vocabulary and encoding
def build_vocab(data):
    all_words = []
    for (pattern, response) in data:
        pattern = preprocess(pattern)
        response = preprocess(response)
        words = pattern.split() + response.split()
        all_words.extend(words)
    all_words = sorted(set(all_words))
    word_to_idx = {word: idx for idx, word in enumerate(all_words)}
    return word_to_idx, all_words

def encode(text, word_to_idx):
    text = preprocess(text)
    return [word_to_idx[word] for word in text.split() if word in word_to_idx]

def build_vocab_and_encode(data):
    word_to_idx, all_words = build_vocab(data)
    encoded_data = [(encode(pattern, word_to_idx), encode(response, word_to_idx)) for (pattern, response) in data]
    return encoded_data, word_to_idx, all_words

# Pad sequences
def pad_sequence(seq, max_len, padding_value=0):
    return seq + [padding_value] * (max_len - len(seq))

# Load and process dataset
file_path = 'dataset/chatbot-dialogs.txt'  # Path to your dataset file
data = load_dataset(file_path)
encoded_data, word_to_idx, all_words = build_vocab_and_encode(data)

# Determine maximum length
max_pattern_len = max(len(pattern) for pattern, response in encoded_data)
max_response_len = max(len(response) for pattern, response in encoded_data)
max_len = max(max_pattern_len, max_response_len)

# Pad patterns and responses
padded_patterns = [pad_sequence(pattern, max_len) for pattern, response in encoded_data]
padded_responses = [pad_sequence(response, max_len) for pattern, response in encoded_data]

# Convert to tensors
patterns = torch.tensor(padded_patterns, dtype=torch.long)
responses = torch.tensor(padded_responses, dtype=torch.long)

class ChatbotModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, output_size, max_len):
        super(ChatbotModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hiddequin_size, batch_first=True)
        self.fc = nn.Linear(hidden_size * max_len, output_size * max_len)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = x.contiguous().view(x.size(0), -1)
        x = self.fc(x)
        return x.view(x.size(0), max_len, -1)

# Initialize model
vocab_size = len(all_words)
embed_size = 10
hidden_size = 20
output_size = vocab_size
model = ChatbotModel(vocab_size, embed_size, hidden_size, output_size, max_len)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(patterns)

    outputs = outputs.view(-1, output_size)
    responses = responses.view(-1)

    if outputs.shape[0] != responses.shape[0]:
        print(f"WARNING: Mismatch in output and response sizes. Outputs: {outputs.shape}, Responses: {responses.shape}")
    else:
        loss = criterion(outputs, responses)
        loss.backward()
        optimizer.step()
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 7.8607
Epoch [2/10], Loss: 7.7759
Epoch [3/10], Loss: 7.6911
Epoch [4/10], Loss: 7.6032
Epoch [5/10], Loss: 7.5107
Epoch [6/10], Loss: 7.4130
Epoch [7/10], Loss: 7.3097
Epoch [8/10], Loss: 7.2004
Epoch [9/10], Loss: 7.0846
Epoch [10/10], Loss: 6.9621


## Prediction 

In [8]:
def predict_response(input_text):
    input_text = preprocess(input_text)
    input_pattern = encode(input_text, word_to_idx)
    input_pattern = pad_sequence(input_pattern, max_len)
    input_pattern = torch.tensor(input_pattern, dtype=torch.long).unsqueeze(0)

    with torch.no_grad():
        output = model(input_pattern)

    output = output.view(-1, output_size)
    _, predicted = torch.max(output, dim=1)

    predicted = predicted.numpy()

    response_words = [all_words[idx] for idx in predicted if idx < len(all_words)]
    response_text = ' '.join(response_words)
    return response_text

# Chat with the bot
print("Start chatting with the bot (type 'quit' to stop)!")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = predict_response(user_input)
    print(f"Bot: {response}")

Start chatting with the bot (type 'quit' to stop)!


You:  hi, how are you doing?


Bot: if how matter how and to is this 011287 to too the the 011287 011287 011287 011287 011287 011287


You:  hi


Bot: if how lot how him get is this 011287 they too in and 011287 011287 011287 011287 011287 011287


You:  bye


Bot: if how lot how him get is this while his too in and 011287 011287 011287 011287 011287 011287


You:  what is your name


Bot: if how people how him to is this 011287 to too the and 011287 011287 011287 011287 011287 011287


You:  quit


## 2. Training Chatbot(100 epochs and GPU based)

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import re
import numpy as np

# Load dataset
def load_dataset(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = [line.strip().split('\t') for line in file.readlines()]
    return data

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Vocabulary and encoding
def build_vocab(data):
    all_words = []
    for (pattern, response) in data:
        pattern = preprocess(pattern)
        response = preprocess(response)
        words = pattern.split() + response.split()
        all_words.extend(words)
    all_words = sorted(set(all_words))
    word_to_idx = {word: idx for idx, word in enumerate(all_words)}
    return word_to_idx, all_words

def encode(text, word_to_idx):
    text = preprocess(text)
    return [word_to_idx[word] for word in text.split() if word in word_to_idx]

def build_vocab_and_encode(data):
    word_to_idx, all_words = build_vocab(data)
    encoded_data = [(encode(pattern, word_to_idx), encode(response, word_to_idx)) for (pattern, response) in data]
    return encoded_data, word_to_idx, all_words

# Pad sequences
def pad_sequence(seq, max_len, padding_value=0):
    return seq + [padding_value] * (max_len - len(seq))

# Load and process dataset
file_path = 'dataset/chatbot-dialogs.txt'  # Path to your dataset file
data = load_dataset(file_path)
encoded_data, word_to_idx, all_words = build_vocab_and_encode(data)

# Determine maximum length
max_pattern_len = max(len(pattern) for pattern, response in encoded_data)
max_response_len = max(len(response) for pattern, response in encoded_data)
max_len = max(max_pattern_len, max_response_len)

# Pad patterns and responses
padded_patterns = [pad_sequence(pattern, max_len) for pattern, response in encoded_data]
padded_responses = [pad_sequence(response, max_len) for pattern, response in encoded_data]

# Convert to tensors
patterns = torch.tensor(padded_patterns, dtype=torch.long)
responses = torch.tensor(padded_responses, dtype=torch.long)

# Check for GPU availability and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class ChatbotModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, output_size, max_len):
        super(ChatbotModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size * max_len, output_size * max_len)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = x.contiguous().view(x.size(0), -1)
        x = self.fc(x)
        return x.view(x.size(0), max_len, -1)

# Initialize model
vocab_size = len(all_words)
embed_size = 10
hidden_size = 20
output_size = vocab_size
model = ChatbotModel(vocab_size, embed_size, hidden_size, output_size, max_len).to(device)

# Move tensors to device
patterns = patterns.to(device)
responses = responses.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100  # Increase the number of epochs
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(patterns)

    outputs = outputs.view(-1, output_size)
    responses = responses.view(-1)

    if outputs.shape[0] != responses.shape[0]:
        print(f"WARNING: Mismatch in output and response sizes. Outputs: {outputs.shape}, Responses: {responses.shape}")
    else:
        loss = criterion(outputs, responses)
        loss.backward()
        optimizer.step()
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')



Epoch [1/100], Loss: 7.8544
Epoch [2/100], Loss: 7.7927
Epoch [3/100], Loss: 7.7309
Epoch [4/100], Loss: 7.6668
Epoch [5/100], Loss: 7.5995
Epoch [6/100], Loss: 7.5284
Epoch [7/100], Loss: 7.4530
Epoch [8/100], Loss: 7.3731
Epoch [9/100], Loss: 7.2882
Epoch [10/100], Loss: 7.1981
Epoch [11/100], Loss: 7.1024
Epoch [12/100], Loss: 7.0008
Epoch [13/100], Loss: 6.8930
Epoch [14/100], Loss: 6.7785
Epoch [15/100], Loss: 6.6570
Epoch [16/100], Loss: 6.5281
Epoch [17/100], Loss: 6.3915
Epoch [18/100], Loss: 6.2468
Epoch [19/100], Loss: 6.0936
Epoch [20/100], Loss: 5.9317
Epoch [21/100], Loss: 5.7607
Epoch [22/100], Loss: 5.5804
Epoch [23/100], Loss: 5.3907
Epoch [24/100], Loss: 5.1914
Epoch [25/100], Loss: 4.9824
Epoch [26/100], Loss: 4.7639
Epoch [27/100], Loss: 4.5362
Epoch [28/100], Loss: 4.3001
Epoch [29/100], Loss: 4.0568
Epoch [30/100], Loss: 3.8088
Epoch [31/100], Loss: 3.5600
Epoch [32/100], Loss: 3.3163
Epoch [33/100], Loss: 3.0854
Epoch [34/100], Loss: 2.8760
Epoch [35/100], Loss: 2

# Prediction (100 epochs)

In [11]:
def predict_response(input_text):
    input_text = preprocess(input_text)
    input_pattern = encode(input_text, word_to_idx)
    input_pattern = pad_sequence(input_pattern, max_len)
    input_pattern = torch.tensor(input_pattern, dtype=torch.long).unsqueeze(0).to(device)  # Move to device

    with torch.no_grad():
        output = model(input_pattern)

    output = output.view(-1, output_size)
    _, predicted = torch.max(output, dim=1)

    predicted = predicted.cpu().numpy()  # Move to CPU before converting to numpy

    response_words = [all_words[idx] for idx in predicted if idx < len(all_words)]
    response_text = ' '.join(response_words)
    return response_text

# Chat with the bot
print("Start chatting with the bot (type 'quit' to stop)!")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = predict_response(user_input)
    print(f"Bot: {response}")


Start chatting with the bot (type 'quit' to stop)!


You:  hi, how are you doing?


Bot: i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  hi


Bot: i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  what is your name


Bot: i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  quit


# 3. Training (400 epochs)

In [16]:
import torch

print("Available GPUs:")
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")


Available GPUs:
GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU


In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import re
import numpy as np

# Load dataset
def load_dataset(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = [line.strip().split('\t') for line in file.readlines()]
    return data

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Vocabulary and encoding
def build_vocab(data):
    all_words = []
    for (pattern, response) in data:
        pattern = preprocess(pattern)
        response = preprocess(response)
        words = pattern.split() + response.split()
        all_words.extend(words)
    all_words = sorted(set(all_words))
    word_to_idx = {word: idx for idx, word in enumerate(all_words)}
    return word_to_idx, all_words

def encode(text, word_to_idx):
    text = preprocess(text)
    return [word_to_idx[word] for word in text.split() if word in word_to_idx]

def build_vocab_and_encode(data):
    word_to_idx, all_words = build_vocab(data)
    encoded_data = [(encode(pattern, word_to_idx), encode(response, word_to_idx)) for (pattern, response) in data]
    return encoded_data, word_to_idx, all_words

# Pad sequences
def pad_sequence(seq, max_len, padding_value=0):
    return seq + [padding_value] * (max_len - len(seq))

# Load and process dataset
file_path = 'dataset/chatbot-dialogs.txt'  # Path to your dataset file
data = load_dataset(file_path)
encoded_data, word_to_idx, all_words = build_vocab_and_encode(data)

# Determine maximum length
max_pattern_len = max(len(pattern) for pattern, response in encoded_data)
max_response_len = max(len(response) for pattern, response in encoded_data)
max_len = max(max_pattern_len, max_response_len)

# Pad patterns and responses
padded_patterns = [pad_sequence(pattern, max_len) for pattern, response in encoded_data]
padded_responses = [pad_sequence(response, max_len) for pattern, response in encoded_data]

# Convert to tensors
patterns = torch.tensor(padded_patterns, dtype=torch.long)
responses = torch.tensor(padded_responses, dtype=torch.long)

# Check for GPU availability and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class ChatbotModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers, output_size, max_len):
        super(ChatbotModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(hidden_size * max_len, output_size * max_len)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = x.contiguous().view(x.size(0), -1)
        x = self.fc(x)
        return x.view(x.size(0), max_len, -1)

# Initialize model
vocab_size = len(all_words)
embed_size = 50
hidden_size = 128
num_layers = 2
output_size = vocab_size
model = ChatbotModel(vocab_size, embed_size, hidden_size, num_layers, output_size, max_len).to(device)

# Move tensors to device
patterns = patterns.to(device)
responses = responses.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10  # Increase the number of epochs
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(patterns)

    outputs = outputs.view(-1, output_size)
    responses = responses.view(-1)

    if outputs.shape[0] != responses.shape[0]:
        print(f"WARNING: Mismatch in output and response sizes. Outputs: {outputs.shape}, Responses: {responses.shape}")
    else:
        loss = criterion(outputs, responses)
        loss.backward()
        optimizer.step()
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')



Epoch [1/10], Loss: 7.8412
Epoch [2/10], Loss: 7.6640
Epoch [3/10], Loss: 7.4188
Epoch [4/10], Loss: 7.0501
Epoch [5/10], Loss: 6.5117
Epoch [6/10], Loss: 5.7540
Epoch [7/10], Loss: 4.7650
Epoch [8/10], Loss: 3.6457
Epoch [9/10], Loss: 2.7997
Epoch [10/10], Loss: 2.4560


# 3. Prediction (400 epochs)

In [29]:
def predict_response(input_text):
    input_text = preprocess(input_text)
    input_pattern = encode(input_text, word_to_idx)
    input_pattern = pad_sequence(input_pattern, max_len)
    input_pattern = torch.tensor(input_pattern, dtype=torch.long).unsqueeze(0).to(device)  # Move to device

    with torch.no_grad():
        output = model(input_pattern)

    output = output.view(-1, output_size)
    _, predicted = torch.max(output, dim=1)

    predicted = predicted.cpu().numpy()  # Move to CPU before converting to numpy

    response_words = [all_words[idx] for idx in predicted if idx < len(all_words)]
    response_text = ' '.join(response_words)
    return response_text

# Chat with the bot
print("Start chatting with the bot (type 'quit' to stop)!")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = predict_response(user_input)
    print(f"Bot: {response}")


Start chatting with the bot (type 'quit' to stop)!


You:  quit


In [22]:
# Data Loader used---------------

import torch
import torch.nn as nn
import torch.optim as optim
import re
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# Load dataset
def load_dataset(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            if '\t' in line:
                pattern, response = line.strip().split('\t')
                data.append((pattern, response))
    return data

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Vocabulary and encoding
def build_vocab(data):
    all_words = []
    for (pattern, response) in data:
        pattern = preprocess(pattern)
        response = preprocess(response)
        words = pattern.split() + response.split()
        all_words.extend(words)
    all_words = sorted(set(all_words))
    word_to_idx = {word: idx for idx, word in enumerate(all_words)}
    return word_to_idx, all_words

def encode(text, word_to_idx):
    text = preprocess(text)
    return [word_to_idx[word] for word in text.split() if word in word_to_idx]

def build_vocab_and_encode(data):
    word_to_idx, all_words = build_vocab(data)
    encoded_data = [(encode(pattern, word_to_idx), encode(response, word_to_idx)) for (pattern, response) in data]
    return encoded_data, word_to_idx, all_words

# Pad sequences
def pad_sequence(seq, max_len, padding_value=0):
    return seq + [padding_value] * (max_len - len(seq))

# Load and process dataset
file_path = 'dataset/chatbot-dialogs.txt'
data = load_dataset(file_path)
encoded_data, word_to_idx, all_words = build_vocab_and_encode(data)

# Determine maximum length
max_pattern_len = max(len(pattern) for pattern, response in encoded_data)
max_response_len = max(len(response) for pattern, response in encoded_data)
max_len = max(max_pattern_len, max_response_len)

# Pad patterns and responses
padded_patterns = [pad_sequence(pattern, max_len) for pattern, response in encoded_data]
padded_responses = [pad_sequence(response, max_len) for pattern, response in encoded_data]

# Convert to tensors
patterns = torch.tensor(padded_patterns, dtype=torch.long)
responses = torch.tensor(padded_responses, dtype=torch.long)

# Check for GPU availability and set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Move tensors to device
patterns = patterns.to(device)
responses = responses.to(device)

# Create DataLoader
batch_size = 64
dataset = TensorDataset(patterns, responses)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

class ChatbotModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers, output_size, max_len):
        super(ChatbotModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(hidden_size * max_len, output_size * max_len)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = x.contiguous().view(x.size(0), -1)
        x = self.fc(x)
        return x.view(x.size(0), max_len, -1)

# Initialize model
vocab_size = len(all_words)
embed_size = 50
hidden_size = 128
num_layers = 2
output_size = vocab_size
model = ChatbotModel(vocab_size, embed_size, hidden_size, num_layers, output_size, max_len).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 1
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for batch_patterns, batch_responses in dataloader:
        batch_patterns = batch_patterns.to(device)
        batch_responses = batch_responses.to(device)

        optimizer.zero_grad()
        outputs = model(batch_patterns)
        outputs = outputs.view(-1, output_size)
        batch_responses = batch_responses.view(-1)

        loss = criterion(outputs, batch_responses)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(dataloader):.4f}')



Epoch [1/1], Loss: 1.0598


In [23]:
def predict_response(input_text):
    input_text = preprocess(input_text)
    input_pattern = encode(input_text, word_to_idx)
    input_pattern = pad_sequence(input_pattern, max_len)
    input_pattern = torch.tensor(input_pattern, dtype=torch.long).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_pattern)

    output = output.view(-1, output_size)
    _, predicted = torch.max(output, dim=1)

    predicted = predicted.cpu().numpy()

    response_words = [all_words[idx] for idx in predicted if idx < len(all_words)]
    response_text = ' '.join(response_words)
    return response_text

# Chat with the bot
print("Start chatting with the bot (type 'quit' to stop)!")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = predict_response(user_input)
    print(f"Bot: {response}")

Start chatting with the bot (type 'quit' to stop)!


You:  what is your name


Bot: 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  hi, how are you doing?


Bot: 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  hi


Bot: 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  quit


# CSV

In [38]:
def txt_to_csv(txt_file, csv_file):
  with open(txt_file, 'r', encoding='utf-8') as txt_file, open(csv_file, 'w', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['question', 'response']
    csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    csv_writer.writeheader()
    for line in txt_file:
      columns = line.strip().split('\t')
      csv_writer.writerow({'question': columns[0], 'response': columns[1]})

# Example usage:
txt_file_path = 'dataset/chatbot-dialogs.txt'
csv_file_path = 'dataset/chatbot-dialogs1.csv'
txt_to_csv(txt_file_path, csv_file_path)

In [46]:
import torch
import torch.nn as nn
import torch.optim as optim
import re
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

# Load dataset from CSV
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    data = []
    for _, row in df.iterrows():
        pattern = str(row['question'])
        response = str(row['response'])
        data.append((pattern, response))
    return data

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Vocabulary and encoding
def build_vocab(data):
    all_words = []
    for (pattern, response) in data:
        pattern = preprocess(pattern)
        response = preprocess(response)
        words = pattern.split() + response.split()
        all_words.extend(words)
    all_words = sorted(set(all_words))
    word_to_idx = {word: idx for idx, word in enumerate(all_words)}
    return word_to_idx, all_words

def encode(text, word_to_idx):
    text = preprocess(text)
    return [word_to_idx[word] for word in text.split() if word in word_to_idx]

def build_vocab_and_encode(data):
    word_to_idx, all_words = build_vocab(data)
    encoded_data = [(encode(pattern, word_to_idx), encode(response, word_to_idx)) for (pattern, response) in data]
    return encoded_data, word_to_idx, all_words

# Pad sequences
def pad_sequence(seq, max_len, padding_value=0):
    return seq + [padding_value] * (max_len - len(seq))

# Load and process dataset
file_path = 'dataset/chatbot-dialogs1.csv'  # Update to your dataset file path
data = load_dataset(file_path)

# Debugging: Print loaded data
print(f"Loaded data: {data[:5]}")  # Print first 5 entries for inspection

encoded_data, word_to_idx, all_words = build_vocab_and_encode(data)

# Debugging: Print encoded data
print(f"Encoded data: {encoded_data[:5]}")  # Print first 5 entries for inspection


# Determine maximum length
max_pattern_len = max(len(pattern) for pattern, response in encoded_data)
max_response_len = max(len(response) for pattern, response in encoded_data)
max_len = max(max_pattern_len, max_response_len)

# Pad patterns and responses
padded_patterns = [pad_sequence(pattern, max_len) for pattern, response in encoded_data]
padded_responses = [pad_sequence(response, max_len) for pattern, response in encoded_data]

# Convert to tensors
patterns = torch.tensor(padded_patterns, dtype=torch.long)
responses = torch.tensor(padded_responses, dtype=torch.long)

# Check for GPU availability and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU 1 (NVIDIA RTX)

# Move tensors to device
patterns = patterns.to(device)
responses = responses.to(device)

class ChatbotModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers, output_size, max_len):
        super(ChatbotModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(hidden_size * max_len, output_size * max_len)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = x.contiguous().view(x.size(0), -1)
        x = self.fc(x)
        return x.view(x.size(0), max_len, -1)

# Initialize model
vocab_size = len(all_words)
embed_size = 50
hidden_size = 128
num_layers = 2quit
output_size = vocab_size
model = ChatbotModel(vocab_size, embed_size, hidden_size, num_layers, output_size, max_len).to(device)  # Move model to GPU

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(patterns)

    outputs = outputs.view(-1, output_size)
    responses = responses.view(-1)

    if outputs.shape[0] != responses.shape[0]:
        print(f"WARNING: Mismatch in output and response sizes. Outputs: {outputs.shape}, Responses: {responses.shape}")
    else:
        loss = criterion(outputs, responses)
        loss.backward()
        optimizer.step()
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')



Loaded data: [('hi, how are you doing?', "i'm fine. how about yourself?"), ("i'm fine. how about yourself?", "i'm pretty good. thanks for asking."), ("i'm pretty good. thanks for asking.", 'no problem. so how have you been?'), ('no problem. so how have you been?', "i've been great. what about you?"), ("i've been great. what about you?", "i've been good. i'm in school right now.")]
Encoded data: [([1014, 1063, 149, 2508, 644], [1089, 809, 1063, 65, 2515]), ([1089, 809, 1063, 65, 2515], [1089, 1671, 928, 2198, 852, 168]), ([1089, 1671, 928, 2198, 852, 168], [1443, 1678, 1999, 1063, 985, 2508, 235]), ([1443, 1678, 1999, 1063, 985, 2508, 235], [1139, 235, 945, 2424, 65, 2508]), ([1139, 235, 945, 2424, 65, 2508], [1139, 235, 928, 1089, 1095, 1854, 1788, 1462])]
Epoch [1/10], Loss: 7.8319
Epoch [2/10], Loss: 7.6702
Epoch [3/10], Loss: 7.4357
Epoch [4/10], Loss: 7.0745
Epoch [5/10], Loss: 6.5361
Epoch [6/10], Loss: 5.7611
Epoch [7/10], Loss: 4.7232
Epoch [8/10], Loss: 3.5437
Epoch [9/10], Los

In [47]:
def predict_response(input_text):
    input_text = preprocess(input_text)
    input_pattern = encode(input_text, word_to_idx)
    input_pattern = pad_sequence(input_pattern, max_len)
    input_pattern = torch.tensor(input_pattern, dtype=torch.long).unsqueeze(0).to(device)  # Move to GPU

    with torch.no_grad():
        output = model(input_pattern)

    output = output.view(-1, output_size)
    _, predicted = torch.max(output, dim=1)

    predicted = predicted.cpu().numpy()  # Move to CPU before converting to numpy

    response_words = [all_words[idx] for idx in predicted if idx < len(all_words)]
    response_text = ' '.join(response_words)
    return response_text

# Chat with the bot
print("Start chatting with the bot (type 'quit' to stop)!")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = predict_response(user_input)
    print(f"Bot: {response}")


Start chatting with the bot (type 'quit' to stop)!


You:  hi, how are you doing?


Bot: i i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  what is your name


Bot: i i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  who are you


Bot: i i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  what you doing


Bot: i i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  where are you


Bot: i i you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  hi


Bot: i you you 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287 011287


You:  quit
