In [27]:
# Import necessary libraries
import numpy as np  # For numerical operations
import random  # For generating random numbers
import json  # For working with JSON files
import nltk  # For natural language processing tasks
import torch  # For creating and training deep learning models
import torch.nn as nn  # For defining neural network architectures
from torch.utils.data import Dataset, DataLoader  # For loading and processing datasets efficiently
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
from typing import List  # Import the List type hint for type annotations


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [28]:
# Open the intents JSON file in read mode
with open('intents.json', 'r') as f:
    # Load the contents of the file into a dictionary
    intents = json.load(f)

In [29]:

# function to tokenize a sentence
def tokenize(sentence):
    # Use the NLTK word_tokenize function to tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    return tokens

# function to stem a word
def stem(word):
    # Use the PorterStemmer algorithm to stem the word
    stemmed_word = stemmer.stem(word.lower())
    return stemmed_word

In [30]:

def bag_of_words(tokenized_sentence: List[str], words: List[str]) -> np.ndarray:
    # Stem each word in the tokenized sentence
    sentence_words = [stem(word) for word in tokenized_sentence]
    
    # Initialize the bag with zeros for each word in the list of known words
    bag = np.zeros(len(words), dtype=np.float32)
    
    # Loop through each word in the list of known words and check if it's in the stemmed sentence
    for idx, w in enumerate(words):
        if w in sentence_words: 
            bag[idx] = 1
    
    return bag


This code initializes three empty lists to store the words, tags, and (words, tag) pairs that will be used to train a natural language processing model. It then loops through each intent in the intents dictionary and extracts the tag associated with that intent. The tag is added to the list of tags. The code then loops through each pattern associated with the intent and tokenizes the pattern into individual words using the tokenize function. The words are added to the list of all words, and the (words, tag) pair is added to the list of pairs. This process is repeated for each intent and pattern in the intents dictionary. The resulting all_words, tags, and xy lists can be used to train a model to understand natural language input and generate appropriate responses.





In [31]:
# Create empty lists to store the words, tags, and (words, tag) pairs
all_words = []
tags = []
xy = []

# Loop through each intent in the `intents` dictionary
for intent in intents['intents']:
    # Extract the tag from the intent
    tag = intent['tag']
    # Add the tag to the list of tags
    tags.append(tag)
    # Loop through each pattern in the intent
    for pattern in intent['patterns']:
        # Tokenize the pattern into individual words
        words = tokenize(pattern)
        # Add the words to the list of all words
        all_words.extend(words)
        # Add the (words, tag) pair to the list of pairs
        xy.append((words, tag))


In [32]:
# Define a list of words to ignore
ignore_words = ['?', '.', '!']

# Stem and lower each word in the list of all words, and exclude the words in the ignore list
all_words = [stem(w.lower()) for w in all_words if w not in ignore_words]

# Remove duplicates and sort the list of all words
all_words = sorted(set(all_words))

# Sort the list of tags
tags = sorted(set(tags))

# Print some information about the dataset
print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(all_words), "unique stemmed words:", all_words)


56 patterns
11 tags: ['contact', 'delivery', 'funny', 'goodbye', 'greeting', 'help', 'items', 'name', 'payments', 'thanks', 'weather']
104 unique stemmed words: ["'m", "'s", 'a', 'accept', 'address', 'advic', 'an', 'ani', 'anyon', 'are', 'assist', 'be', 'by', 'bye', 'call', 'can', 'card', 'cash', 'chat', 'contact', 'credit', 'custom', 'day', 'deliveri', 'do', 'doe', 'email', 'for', 'funni', 'get', 'give', 'go', 'good', 'goodby', 'guid', 'have', 'hello', 'help', 'hey', 'hi', 'how', 'human', 'i', 'in', 'is', 'it', 'item', 'joke', 'kind', 'know', 'later', 'laugh', 'like', 'long', 'lot', 'make', 'mastercard', 'may', 'me', 'my', 'name', 'need', 'number', 'of', 'onli', 'order', 'outsid', 'pay', 'paypal', 'phone', 'rain', 'see', 'sell', 'servic', 'ship', 'should', 'someth', 'stori', 'stuck', 'suggest', 'sunni', 'support', 'take', 'talk', 'tell', 'temperatur', 'thank', 'that', 'the', 'there', 'to', 'today', 'touch', 'track', 'umbrella', 'weather', 'what', 'when', 'which', 'who', 'will', 'with'

This code creates empty lists to store the training data that will be used to train a natural language processing model. It then loops through each (pattern_sentence, tag) pair in the list of pairs, which contains a tokenized sentence and its corresponding tag. For each pair, the code converts the pattern sentence to a bag of words array using the bag_of_words function, and adds the resulting array to the list of X training data. It also converts the tag to a class label using the index method of the tags list, and adds the resulting label to the list of y training data. After processing all the pairs, the code converts the lists of X and y training data to numpy arrays using the np.array function. The resulting X_train and y_train arrays can be used to train a natural language processing model using a machine learning library such as PyTorch or TensorFlow.

In [33]:
# Create empty lists to store the training data
X_train = []
y_train = []

# Loop through each (pattern_sentence, tag) pair in the list of pairs
for (pattern_sentence, tag) in xy:
    # Convert the pattern sentence to a bag of words array
    bag = bag_of_words(pattern_sentence, all_words)
    # Add the bag of words array to the list of X training data
    X_train.append(bag)
    # Convert the tag to a class label
    label = tags.index(tag)
    # Add the class label to the list of y training data
    y_train.append(label)

# Convert the lists of X and y training data to numpy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)


This code defines a PyTorch neural network module called NeuralNet, which inherits from the nn.Module class. The __init__ method of the NeuralNet class defines three linear layers with ReLU activation functions, and initializes them with the specified input size, hidden size, and number of classes. The forward method of the NeuralNet class defines the forward pass through the network, which takes an input tensor x, applies the linear layers and ReLU activations in sequence, and returns the output tensor. The output tensor does not have an activation or softmax function applied, as this will be handled by the loss function during training. This neural network architecture is a simple feedforward neural network with three hidden layers and ReLU activation functions, and can be used to classify text data into different categories based on the bag-of-words representation of the input.

In [34]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        # Define three linear layers with ReLU activation functions
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.l2 = nn.Linear(hidden_size, hidden_size) 
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()  # Initialize the ReLU activation function
    
    def forward(self, x):
        # Forward pass through the network
        out = self.l1(x)  # Apply the first linear layer to the input tensor
        out = self.relu(out)  # Apply the ReLU activation function to the output tensor
        out = self.l2(out)  # Apply the second linear layer to the output tensor
        out = self.relu(out)  # Apply the ReLU activation function to the output tensor
        out = self.l3(out)  # Apply the third linear layer to the output tensor
        # No activation or softmax at the end, as this will be handled by the loss function during training
        return out

This code defines a PyTorch Dataset class called ChatDataset, which is used to represent a dataset of input-output pairs for training a machine learning model. The __init__ method of the ChatDataset class initializes the dataset with the X_train and y_train arrays that were created earlier in the preprocessing step. The __getitem__ method of the ChatDataset class supports indexing such that dataset[i] can be used to get the i-th sample in the dataset, and returns the x and y data corresponding to the given index. The __len__ method of the ChatDataset class returns the number of samples in the dataset when len(dataset) is called. This ChatDataset class can be used to create a PyTorch DataLoader object, which can be used to batch and shuffle the training data during training of the neural network model.

In [35]:
class ChatDataset(Dataset):
    def __init__(self):
        # Initialize the dataset with the X and y training data
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    # Support indexing such that dataset[i] can be used to get the i-th sample
    def __getitem__(self, index):
        # Return the x and y data corresponding to the given index
        return self.x_data[index], self.y_data[index]

    # Return the number of samples in the dataset when len(dataset) is called
    def __len__(self):
        return self.n_samples

In [36]:
# Set some hyper-parameters for the model
num_epochs = 1000  # The number of training epochs
batch_size = 8  # The batch size used for training
learning_rate = 0.001  # The learning rate used for training
input_size = len(X_train[0])  # The number of features in the input data
hidden_size = 8  # The number of neurons in the hidden layer
output_size = len(tags)  # The number of classes in the output layer

# Print some information about the dataset and model
print(input_size, output_size)


104 11


This code creates a ChatDataset object to represent the training data, and a DataLoader object to batch and shuffle the training data during training. The DataLoader object is initialized with the ChatDataset object, and the batch_size, shuffle, and num_workers arguments control the size of the batches, whether to shuffle the data during training, and the number of subprocesses used for data loading. The device variable is set to either 'cuda' or 'cpu' based on whether a CUDA-capable GPU is available for training. The model variable is created as a NeuralNet object with the specified input size, hidden size, and output size, and is moved to the appropriate device using the to method. The criterion variable is set to the cross-entropy loss function, which is commonly used for classification tasks, and the optimizer variable is set to the Adam optimization algorithm with the specified learning rate. These objects will be used to train the model in the next step.

In [37]:
# Create a ChatDataset object to represent the training data
dataset = ChatDataset()

# Create a DataLoader object to batch and shuffle the training data during training
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

# Determine whether to use the GPU or CPU for training based on availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create a NeuralNet object to represent the neural network model and move it to the device
model = NeuralNet(input_size, hidden_size, output_size).to(device)

# Define the loss function and optimization algorithm for training the model
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

This code trains the neural network model on the training data using the num_epochs, train_loader, model, criterion, and optimizer objects defined earlier. The training loop iterates over the specified number of epochs, and for each epoch, iterates over the batches of input-output pairs in the train_loader. The input and label data are moved to the appropriate device, and a forward pass is performed through the network to compute the output tensor and loss. A backward pass is then performed through the network to compute the gradients of the loss with respect to the network weights, and the optimizer is used to update the weights based on the computed gradients. The loss is printed every 100 epochs to monitor the progress of the training. After training, the final loss is printed, and the trained model, input size, hidden size, output size, all words, and tags are saved to a dictionary called data. This dictionary can be used to save and load the trained model for later use in making predictions on new input data.

In [38]:
# Train the model on the training data
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        # Move the input and label data to the appropriate device
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Perform a forward pass through the network and compute the loss
        outputs = model(words)
        loss = criterion(outputs, labels)
        
        # Perform a backward pass through the network and update the weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # Print the loss every 100 epochs
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Print the final loss after training
print(f'final loss: {loss.item():.4f}')

# Save the trained model, input size, hidden size, output size, all words, and tags to a dictionary
data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}


Epoch [100/1000], Loss: 0.7341
Epoch [200/1000], Loss: 0.0476
Epoch [300/1000], Loss: 0.0078
Epoch [400/1000], Loss: 0.0068
Epoch [500/1000], Loss: 0.0016
Epoch [600/1000], Loss: 0.0010
Epoch [700/1000], Loss: 0.0012
Epoch [800/1000], Loss: 0.0004
Epoch [900/1000], Loss: 0.0003
Epoch [1000/1000], Loss: 0.0002
final loss: 0.0002


In [39]:
# Save the trained model, input size, hidden size, output size, all words, and tags to a file
FILE = "data.pth"
torch.save(data, FILE)

# Print a message indicating that training is complete and the file has been saved
print(f'training complete. file saved to {FILE}')


training complete. file saved to data.pth


In [40]:
# Determine whether to use the GPU or CPU for inference based on availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the intents file and trained model from disk
with open('intents.json', 'r') as json_data:
    intents = json.load(json_data)
FILE = "data.pth"
data = torch.load(FILE)

# Extract the input size, hidden size, output size, all words, tags, and model state from the loaded data
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

# Create a new neural network model with the same architecture as the trained model, move it to the appropriate device, and load the trained weights
model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)

# Set the model to evaluation mode to disable dropout and enable batch normalization
model.eval()


NeuralNet(
  (l1): Linear(in_features=104, out_features=8, bias=True)
  (l2): Linear(in_features=8, out_features=8, bias=True)
  (l3): Linear(in_features=8, out_features=11, bias=True)
  (relu): ReLU()
)

In [42]:
# Set the name of the chatbot
bot_name = "sam"

# Print a message indicating that the chat session has started
print("Let's chat! (type 'quit' to exit)")

# Start an infinite loop to read input from the user and generate responses
while True:
    # Read a sentence from the user
    sentence = input("You: ")
    
    # If the user types 'quit', exit the loop
    if sentence == "quit":
        break
    
    # Tokenize the input sentence and convert it to a bag of words vector
    sentence = tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)
    
    # Pass the bag of words vector through the model to get a prediction
    output = model(X)
    _, predicted = torch.max(output, dim=1)
    
    # Get the tag associated with the predicted class label and the probability of the prediction
    tag = tags[predicted.item()]
    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    
    # If the predicted probability is above a threshold, randomly select a response from the corresponding intent in the intents file
    # Otherwise, print a message indicating that the chatbot does not understand
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: I do not understand...")


Let's chat! (type 'quit' to exit)
You: hello
sam: Hey :-)
You: whats the weather today ?
sam: I'm not able to provide weather information. How can I assist you in another way?
You: tell me a joke ?
sam: Why couldn't the bicycle stand up by itself? Because it was two-tired!
You: how long is my delivery ?
sam: Hi there, how can I help?
You: track my order
sam: I do not understand...
You: what items do you have ?
sam: We have coffee and tea
You: good
sam: Hello, thanks for visiting
You: bye
sam: Bye! Come back again soon.
You: quit
