In [1]:
import nltk
import random
import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [9]:
# Download necessary NLTK resources
nltk.download('punkt_tab')


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [10]:
# Load intents from JSON file
with open('intents.json', 'r') as f:
    intents = json.load(f)

In [11]:
#  Initialize stemmer
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenize(sentence):
    """Split sentence into array of words/tokens."""
    return nltk.word_tokenize(sentence)

def stem(word):
    """Find the root form of the word."""
    return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, words):
    """Return bag of words array."""
    sentence_words = [stem(word) for word in tokenized_sentence]
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in sentence_words:
            bag[idx] = 1
    return bag

In [12]:
def get_response(user_input):
    """Get a response based on user input."""
    tokens = tokenize(user_input.lower())

    # Check for keyword matches first
    for intent in intents['intents']:
        if any(keyword in tokens for keyword in intent.get('keywords', [])):
            return random.choice(intent['responses'])

    # Fallback to pattern matching if no keywords matched
    for intent in intents['intents']:
        for pattern in intent['patterns']:
            if nltk.edit_distance(user_input.lower(), pattern.lower()) < 3:  # Example threshold
                return random.choice(intent['responses'])

    return "I'm sorry, I don't understand."

In [13]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        return out

In [14]:
# Prepare training data
all_words = []
tags = []
xy = []

for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))

# Stem and lower each word
ignore_words = ['?', '.', '!']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))

In [15]:
# Create training data
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    label = tags.index(tag)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [16]:

# Hyper-parameters
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)


In [17]:
class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size, hidden_size, output_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


In [18]:
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)

        outputs = model(words)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

print(f'Final loss: {loss.item():.4f}')


Epoch [100/1000], Loss: 0.6500
Epoch [200/1000], Loss: 0.1154
Epoch [300/1000], Loss: 0.0210
Epoch [400/1000], Loss: 0.0070
Epoch [500/1000], Loss: 0.0039
Epoch [600/1000], Loss: 0.0010
Epoch [700/1000], Loss: 0.0013
Epoch [800/1000], Loss: 0.0006
Epoch [900/1000], Loss: 0.0004
Epoch [1000/1000], Loss: 0.0002
Final loss: 0.0002


In [22]:
data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "chatbot_model.pth"
torch.save(data, FILE)

print(f'Training complete. File saved to {FILE}')


Training complete. File saved to chatbot_model.pth


In [30]:
def evaluate_model(test_data):
    correct_predictions = 0

    for user_input, expected_tag in test_data:
        # Generate response based on user input
        response_tag_index = model(torch.from_numpy(bag_of_words(tokenize(user_input), all_words)).float().to(device)).argmax().item()

        # Get predicted tag from index
        predicted_tag = tags[response_tag_index]

        # Compare predicted tag with expected tag
        if predicted_tag == expected_tag:
            correct_predictions += 1

    accuracy = correct_predictions / len(test_data) * 100  # Convert to percentage
    print(f'Accuracy: {accuracy:.2f}%')

# Example test data (user input and expected response tag)
test_data = [
    ("How to apply for admission?", "admissions"),
    ("What are the admission policies?", "admissions"),
    ("What courses are offered over here?", "courses"),
    ("How much does the MBA program cost?", "fees"),
    ("Can I apply for admission online?", "admissions"),
    ("What are the prerequisites for applying?", "admissions"),
    ("When is the last date to apply?", "admissions"),
    ("Can I apply after the deadline?", "admissions"),
    ("Do you offer computer science courses?", "courses"),
    ("Which engineering courses do you offer?", "courses"),
    ("Is there an engineering program in Artificial Intelligence?", "courses"),
    ("What is the tuition fee for MBA?", "fees"),
    ("Are there any discounts available on the course fees?", "fees"),
    ("Do you have scholarships for international students?", "fees"),
    ("What scholarships do you provide?", "scholarships"),
    ("How can I apply for financial aid?", "scholarships"),
    ("Can I get a scholarship if I am not a top performer?", "scholarships"),
    ("Is there any scholarship for economically disadvantaged students?", "scholarships"),
    ("What is the deadline for applications?", "admissions"),
    ("How do I apply for your courses?", "courses"),
    ("Do you have engineering programs?", "courses"),
    ("Are there any cultural events this month?", "events"),
    ("Can I attend campus events as an external guest?", "events"),
    ("Do you host any guest lectures?", "events"),
    ("Is there any placement for business management students?", "placement"),
    ("What are the placement opportunities for mechanical engineers?", "placement"),
    ("What is the minimum CGPA required for campus placement?", "placement"),
    ("Can I contact the college for internship opportunities?", "contact"),
    ("How do I reach out to the placement office?", "contact"),
    ("What facilities are available for students in the hostel?", "hostel"),
    ("Is there a laundry service in the hostel?", "hostel"),
    ("Do you have a common kitchen in the hostel?", "hostel"),
    ("What is the cost of the hostel for the entire year?", "hostel"),
    ("How do I get information about campus transportation?", "transportation"),
    ("Do you provide shuttle services for students?", "transportation"),
    ("How much is the transportation charge for daily travel?", "transportation"),
    ("Is the transportation service available on weekends?", "transportation"),
    ("Can I use the library facilities remotely?", "library"),
    ("Do you offer online books in the library?", "library"),
    ("Can I borrow books for more than 15 days?", "library"),
    ("Are there library workshops for students?", "library"),
    ("What are the campus facilities available for non-students?", "facilities"),
    ("Do you have sports facilities for alumni?", "facilities"),
    ("Are there conference rooms in the library?", "facilities"),
    ("What student clubs are available for business students?", "clubs"),
    ("Are there any coding competitions hosted by the college?", "clubs"),
    ("What types of cultural activities are there?", "clubs"),
    ("Can I join technical clubs even if I'm not from a technical background?", "clubs"),
    ("Do you have internships available for first-year students?", "internships"),
    ("Is there an internship program for computer science students?", "internships"),
    ("Are internships paid?", "internships"),
    ("Is participation in internships optional?", "internships"),
    ("How do I find out about upcoming seminars?", "events"),
    ("Are there any upcoming events for entrepreneurship?", "events"),
    ("Do you have workshops on data science?", "events"),
    ("What are the academic dates for next year?", "academic_calendar"),
    ("What are the upcoming academic breaks?", "academic_calendar"),
    ("When does the new semester begin?", "academic_calendar"),
    ("What holidays are observed this semester?", "academic_calendar")
]


evaluate_model(test_data)

Accuracy: 69.49%


In [33]:

# Validation Block to Measure Accuracy Based on Responses

def evaluate_model(test_data):
    """
    Evaluate the model's performance based on predicted responses.

    Args:
        test_data: A list of tuples containing user input and expected response.
    """

    correct_predictions = 0

    for user_input, expected_response in test_data:
        # Tokenize and preprocess user input
        tokenized_sentence = tokenize(user_input)
        bag_of_words_vector = bag_of_words(tokenized_sentence, all_words)

        # Convert to tensor and move to device
        bag_of_words_tensor = torch.from_numpy(bag_of_words_vector).float().to(device)

        # Get model output
        output = model(bag_of_words_tensor.unsqueeze(0))  # Add batch dimension
        _, predicted_index = torch.max(output, dim=1)

        # Get predicted tag from index
        predicted_tag = tags[predicted_index.item()]

        # Find a response from the intents.json file for the predicted tag
        predicted_response = None

        for intent in intents['intents']:
            if intent['tag'] == predicted_tag:
                predicted_response = random.choice(intent['responses'])
                break

        # Compare predicted response with expected response
        if predicted_response == expected_response:
            correct_predictions += 1

    # Calculate accuracy based on responses
    accuracy_percentage = (correct_predictions / len(test_data)) * 100  # Convert to percentage
    print(f'Accuracy: {accuracy_percentage:.2f}%')

# Example test data (user input and expected response)
test_data = [
    ("How to apply for admission?", "You can apply for admission through our online portal. Visit the Admissions section on our website for more details."),
    ("What are the admission policies?", "Admission requirements vary by program. Please check the program-specific details in the Admissions section."),
    ("What courses are offered over here?", "We offer a wide range of programs, including engineering, business, arts, and sciences. Visit the Programs section for a detailed list."),
    ("Can I apply for admission online?", "You can apply for admission through our online portal. Visit the Admissions section on our website for more details."),
    ("What are the prerequisites for applying?", "Admission requirements vary by program. Please check the program-specific details in the Admissions section."),
    ("When is the last date to apply?", "The application deadline depends on the program. Please refer to the academic calendar on the website."),
    ("Do you offer computer science courses?", "Yes, we offer engineering programs including Computer Science. Visit the Programs section for more details."),
    ("How much does the MBA program cost?", "The MBA program fee is available on the website under the Fees section. Scholarships may also be available."),
    ("Are there any discounts available on the course fees?", "The tuition fees vary by program. Please visit the Fees and Scholarships section for detailed information."),
    ("Do you offer scholarships for international students?", "Yes, we offer scholarships based on merit and need. Please visit the Scholarships section for more details."),
    ("What is the deadline for applications?", "The application deadline depends on the program. Please refer to the academic calendar on the website."),
    ("How do I apply for your courses?", "You can apply for admission through our online portal. Visit the Admissions section on our website for more details."),
    ("What are the application requirements for the engineering program?", "Admission requirements vary by program. Please check the program-specific details in the Admissions section."),
    ("Is there an engineering program in Artificial Intelligence?", "Yes, we have engineering programs. You can choose from various specializations such as Computer Science, Mechanical, and Civil."),
    ("Can I get a scholarship if I am not a top performer?", "We offer scholarships based on academic performance and financial need. Please visit the Scholarships section for more details."),
    ("How can I apply for financial aid?", "Yes, financial aid is available for eligible students. Please check the Scholarships section for the application process."),
    ("Can I borrow books from the library?", "Students can borrow books for a specific duration. Check the Library section for the borrowing policy."),
    ("Are there any cultural events this month?", "We host various events throughout the year, including workshops, fests, and competitions. Check the Events section for more information."),
    ("What is the fee for hostel accommodation?", "The hostel charges vary depending on the type of room. Please visit the Hostel section on our website for details."),
    ("Do you have a bus service for students?", "Yes, we provide transportation facilities with buses covering major routes. Check the Transportation section for more details."),
    ("Is the library open on weekends?", "The library is open from 8 AM to 8 PM on weekdays and 9 AM to 5 PM on weekends."),
    ("Can I attend campus events as an external guest?", "External guests may be allowed to attend certain campus events. Please check the Events section for more details."),
    ("Are internships mandatory for students?", "Yes, internships are an integral part of our curriculum for most programs."),
    ("What are the placement statistics?", "You can find the latest placement statistics in the Placements section of our website."),
    ("Do you offer online courses?", "We offer a wide range of programs. Visit the Programs section for detailed information on available online courses."),
    ("Are there scholarships for economically disadvantaged students?", "Yes, financial aid is available for eligible students. Please check the Scholarships section for details."),
    ("What types of student clubs do you have?", "We have a variety of student clubs, including technical, cultural, and sports clubs. Visit the Clubs section for more details."),
    ("Can I join clubs if I'm not from a technical background?", "Yes, you can join technical clubs such as the Coding Club, Robotics Club, and AI Club."),
    ("How much does the transportation service cost?", "The transport charges vary by distance. Visit the Transportation section on our website for details."),
    ("Do you have sports facilities on campus?", "Yes, we have a sports complex with facilities for basketball, cricket, football, and more."),
    ("How do I reach the placement office?", "Visit the Contact Us page on our website for detailed contact information."),
    ("What events are happening on campus this semester?", "You can find the latest events and seminars in the Events section of our website."),
    ("What are the library timings?", "The library is open from 8 AM to 8 PM on weekdays and 9 AM to 5 PM on weekends."),
    ("Are there workshops in the library?", "Yes, our library hosts various workshops. Check the Library section for more details."),
    ("Is there a shuttle service for students?", "Yes, we provide transportation facilities with buses covering major routes."),
    ("Can I apply for admission after the deadline?", "Admission deadlines are strict. Please refer to the academic calendar for specific deadlines."),
    ("Do you provide hostel accommodation for international students?", "Yes, we provide hostel facilities for both boys and girls with separate accommodations."),
    ("Are there any technical clubs available?", "Yes, we have technical clubs like the Coding Club, Robotics Club, and AI Club."),
    ("Can I borrow books from the online library?", "Yes, we have an online library portal where you can access e-books and journals."),
    ("Is there a hostel available for female students?", "Yes, we provide separate hostel facilities for both boys and girls."),
    ("Are there scholarships available for graduate students?", "Yes, we offer scholarships based on merit and financial need. Visit the Scholarships section for details."),
    ("What is the fee for the MBA program?", "The MBA program fee is available on the website under the Fees section. Scholarships may also be available."),
]


# Evaluate the model with test data
evaluate_model(test_data)

Accuracy: 11.90%


In [34]:
# Chatting with the bot
print("Let's chat! (type 'quit' to exit)")
while True:
    sentence = input("You: ")
    if sentence.lower() == "quit":
        break

    response = get_response(sentence)
    print(f"Bot: {response}")

Let's chat! (type 'quit' to exit)
You: quit
