In [1]:
# Install gensim library
# Import necessary libraries
# Download pre-trained Word2Vec model
# Define function to load knowledge base from JSON file
# Define function to save knowledge base to JSON file
# Define function to find best match for user question using Word2Vec similarity
# Define function to embed text using Word2Vec model
# Define function to calculate cosine similarity between two vectors
# Define function to get answer for a question from knowledge base
# Define main chatbot function
# Interact with the user, find best match for input, and respond accordingly
# Execute the chatbot function
# Install gensim
!pip install gensim

# Import gensim
import gensim.downloader as api

# Download the pre-trained Word2Vec model (this might take some time)
word2vec_model = api.load('word2vec-google-news-300')

# Check if the model is loaded
print("Model loaded successfully!")


Model loaded successfully!


In [4]:
import os

# List files in the current directory
print(os.listdir())


['.config', 'sample_data']


In [None]:
import json
import gensim.downloader as api
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from difflib import get_close_matches
from google.colab import files

# Load knowledge base from a JSON file
def load_knowledge_base() -> dict:
    uploaded = files.upload()
    for filename in uploaded.keys():
        with open(filename, 'r') as file:
            return json.load(file)

# Save knowledge base to a JSON file
def save_knowledge_base(data: dict):
    with open('knowledge_base.json', 'w') as file:
        json.dump(data, file, indent=2)
    print("Knowledge base saved successfully.")

# Find the best match for user question
def find_best_match(user_input: str, knowledge_base: dict, word2vec_model: Word2Vec) -> str | None:
    user_embedding = embed_text(user_input, word2vec_model)
    best_match = None
    best_score = -1

    for qna in knowledge_base["questions"]:
        question_embedding = embed_text(qna['question'], word2vec_model)
        similarity_score = calculate_similarity(user_embedding, question_embedding)
        if similarity_score > best_score:
            best_score = similarity_score
            best_match = qna['question']

    return best_match

# Embed text using Word2Vec model
def embed_text(text: str, word2vec_model: Word2Vec) -> np.array:
    tokens = text.split()
    word_vectors = [word2vec_model[token] for token in tokens if token in word2vec_model]
    if word_vectors:
        return np.mean(word_vectors, axis=0)
    else:
        return np.zeros(word2vec_model.vector_size)

# Calculate cosine similarity between two vectors
def calculate_similarity(vector1: np.array, vector2: np.array) -> float:
    if np.count_nonzero(vector1) == 0 or np.count_nonzero(vector2) == 0:
        return 0.0
    else:
        return cosine_similarity([vector1], [vector2])[0][0]

# Get answer for a question from the knowledge base
def get_answer_for_question(question: str, knowledge_base: dict) -> str | None:
    for q in knowledge_base["questions"]:
        if q["question"] == question:
            return q["answer"]


# Main chatbot function
def chat_bot():
    knowledge_base: dict = load_knowledge_base()
    questions = [q["question"] for q in knowledge_base["questions"]]

    # Load Word2Vec model
    word2vec_model = api.load('word2vec-google-news-300')

    print("Bot: Hi! I'm your chatbot assistant. You can ask me questions or type 'quit' to exit.")

    while True:
        user_input: str = input('You: ')

        if user_input.lower() == 'quit':
            break

        print(f'You: {user_input}')  # Display user's input

        best_match = find_best_match(user_input, knowledge_base, word2vec_model)
        if best_match:
            answer = get_answer_for_question(best_match, knowledge_base)
            print(f'Bot: {answer}')
        else:
            print('Bot: I don\'t know the answer. Can you teach me?')
            new_answer: str = input('Type the answer or "skip" to skip: ')

            if new_answer.lower() != 'skip':
                knowledge_base["questions"].append({"question": user_input, "answer": new_answer})
                save_knowledge_base(knowledge_base)
                print('Bot: Thank you! I learned a new response!')

if __name__ == '__main__':
    chat_bot()




Saving knowledge_base (1).json to knowledge_base (1).json
Bot: Hi! I'm your chatbot assistant. You can ask me questions or type 'quit' to exit.
You: how can i apply for internship
You: how can i apply for internship
Bot: you can contact internship office at university
You: how can i re register for examination
You: how can i re register for examination
Bot: "A minimum grade of at least 4.0 is required on each module to pass an examination
You: can you tell me the rules to create new password
You: can you tell me the rules to create new password
Bot: Students can request a new password by entering their username and clicking 'Send' on the campus website.
You: what is the validity of password reset link
You: what is the validity of password reset link
Bot: The password reset link is valid for 48 hours from the time it is sent to the student's private email address.
You: How can we download Office365?
You: How can we download Office365?
Bot: Students can download Office365 for free by fol