In [1]:
import pickle
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import os
import spacy
import requests
import re
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
import json


In [None]:
!python -m spacy download en_core_web_sm


In [2]:
# Initialize Spacy NLP model
nlp = spacy.load("en_core_web_sm")


In [3]:
# Load knowledge base from the pickle file
with open('knowledge_base.pkl', 'rb') as f:
    knowledge_base = pickle.load(f)

In [4]:
# Define cosine similarity 
def cosine_similarity(sentence1, sentence2):
    nltk_stopwords = set(stopwords.words('english')) # Properly initializing the NLTK stopwords list
    tokens1 = [word.lower() for word in word_tokenize(sentence1) if word not in nltk_stopwords]
    tokens2 = [word.lower() for word in word_tokenize(sentence2) if word not in nltk_stopwords]
    
    # Vectorization
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([" ".join(tokens1), " ".join(tokens2)])

    # Cosine similarity
    similarity = (vectors * vectors.T).toarray()[0][1]
    return similarity


In [5]:
#Example usage
sentence1 = "This is a sample sentence."
sentence2 = "Test sentence."

similarity = cosine_similarity(sentence1, sentence2)
print(f"Cosine Similarity: {similarity}")

Cosine Similarity: 0.2605556710562624


In [42]:
# Fetch information from knowledge base once user inputs a question
def fetch_information_from_knowledge_base(user_input, knowledge_base, threshold=0.5):
    stop_words = set(stopwords.words('english')) # Properly initializing the stopwords list
    tokens = [t for t in word_tokenize(user_input.lower()) if t not in stop_words]
    
    vectorizer = TfidfVectorizer()
    user_vector = vectorizer.fit_transform([" ".join(tokens)])
    
    knowledge_list = list()
    for i in knowledge_base.values():
        for j in i:
            knowledge_list.append(j)
    
    knowledge_list_vectors = vectorizer.transform(list(knowledge_list))
    
    similarities = (user_vector * knowledge_list_vectors.T).toarray()[0]
    most_similar_index = similarities.argmax()
    most_similar_score = similarities.max()
    
    if most_similar_score > threshold:
        return knowledge_list[most_similar_index].split('.')[:1]
    else:
        web_results = web_lookup(user_input)
        if web_results:
            return web_results
        else:
            return "I'm sorry, I didn't understand that."
    


In [44]:
fetch_information_from_knowledge_base("Travis Kelce", knowledge_base)

['[121] [127] \n \n Travis Kelce [ edit ] \n American football player  Travis Kelce  in 2021 \n Swift began dating  American football  player  Travis Kelce  around August 2023']

In [28]:
# Function to clean Wikipedia response
def clean_wikipedia_response(response):
    # Remove Wikipedia markup
    clean_text = re.sub(r'\[.*?\]', '', response)
    clean_text = re.sub(r'\(.*?\)', '', clean_text)
    
    # Remove extra spaces and line breaks
    clean_text = re.sub(r'\s+', ' ', clean_text)
    clean_text = clean_text.strip()
    
    return clean_text


In [29]:
#  Takes a list of bot responses as input, cleans each response 
def clean_bot_response(bot_response):
    cleaned_responses = []
    for response in bot_response:
        cleaned_response = clean_wikipedia_response(response)
        cleaned_responses.append(cleaned_response)
    return cleaned_responses

In [30]:
def get_random_response(user_input1):
    user_input1 = user_input1.lower()
    responses = {"hi": ["Hello!", "Hi there!", "Hey!"], "hello": ["Hello!", "Hi there!", "Hey!"], "hey": ["Hello!", "Hi there!", "Hey!"],
                 "farewell": ["Goodbye!", "See you later!", "Bye!"], "bye": ["Goodbye!", "See you later!", "Bye!"]}
    return random.choice(responses.get(user_input1))


In [31]:
# Path to store conversation history
conversation_file_path = "conversation_history.txt"

In [32]:
# Store the user - bot conversation history
def save_conversation(user_input, bot_response, user_name, user_interests, user_dislikes):
    with open(conversation_file_path, 'a') as f:
        if user_input.lower() in ["interests", "dislikes"]:
            # Store interests and dislikes only once
            f.write(f"User {user_name}'s {user_input.capitalize()}: {user_interests if user_input.lower() == 'interests' else user_dislikes}\n")
        elif bot_response:  # Check if bot response is not empty
            # Store user input question
            f.write(f"User {user_name}: {user_input}\n")
            # Store bot response
            for response in bot_response:
                f.write(f"Bot: {response}\n")


In [33]:
# Google the input query incase it does not exist in our knowledge base
def web_lookup(query):
    try:
        # Send a GET request to a search engine (e.g., Google) with the query
        response = requests.get(f"https://www.google.com/search?q={query}")
        # Parse the HTML content of the search results page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        # Extract relevant information from the search results (e.g., titles, snippets)
        results = soup.find_all('div', class_='BNeawe vvjwJb AP7Wnd')
        # Process and format the results as needed
        formatted_results = [result.get_text() for result in results]
        return formatted_results
    except Exception as e:
        print(f"Error during web lookup: {e}")
        return None

In [34]:
# Test the web lookup functionality
queries = ["Taylor Swift's age", "Travis Kelce"]

for query in queries:
    print(f"Query: {query}")
    web_result = web_lookup(query)
    print("Web Lookup Result:", web_result)
    print()

Query: Taylor Swift's age
Web Lookup Result: ['Taylor Swift - Wikipedia', 'Taylor Swift | Biography, Albums, Songs, Grammys, & Facts', 'Taylor Swift - Simple English Wikipedia, the free encyclopedia', 'Taylor Swift - Age, Family, Bio - Famous Birthdays', "Celebrate Taylor Swift's Birthday by the Numbers - People", "Taylor Swift Says She's a Proud Millennial in Birthday Post - Billboard", 'Taylor Swift: Biography, Musician, 2024 Grammy Winner', 'Taylor Swift Bio (2023): Boyfriend, Height, Age, Zodiac Sign, Net ...', 'Taylor Swift Net Worth, Biography (Birth, Height, Career, Relationship)', 'How old is Taylor Swift? - Age Calculator - How Old Am I?']

Query: Travis Kelce
Web Lookup Result: ['Travis Kelce (@killatrav) • Instagram photos and videos', 'Travis Kelce (@tkelce) / X', 'Travis Kelce Stats, News and Video - TE - NFL.com', 'Travis Kelce - Wikipedia', 'Travis Kelce - Kansas City Chiefs', 'Travis Kelce - Kansas City Chiefs Tight End - ESPN', "New contract makes Chiefs' Travis Kelce 

In [35]:
class UserModel:
    def __init__(self, user_name):
        self.user_name = user_name
        self.file_path = f"{user_name}.xml"
        if os.path.exists(self.file_path):
            self.load_user_model()
        else:
            self.create_user_model()

    def create_user_model(self):
        self.root = ET.Element("user")
        self.name = ET.SubElement(self.root, "name")
        self.name.text = self.user_name
        self.personal_info = ET.SubElement(self.root, "personal_info")
        self.likes = ET.SubElement(self.root, "likes")
        self.dislikes = ET.SubElement(self.root, "dislikes")

    def load_user_model(self):
        tree = ET.parse(self.file_path)
        self.root = tree.getroot()
        self.name = self.root.find("name")
        self.personal_info = self.root.find("personal_info")
        self.likes = self.root.find("likes")
        self.dislikes = self.root.find("dislikes")

    def add_personal_information(self, category, info):
        if category == 'likes':
            self.likes.text = info
        elif category == 'dislikes':
            self.dislikes.text = info

    def save_user_model(self):
        tree = ET.ElementTree(self.root)
        tree.write(self.file_path)

In [41]:
def chatbot():
    print(" - " * 22)
    print("AJ: Hello! I'm AJ chatbot.")
    print(" - " * 22)
    print()

    print("AJ: Hi there! Please tell me your name.")
    user_name = input("User: ")
    
    # Create a user model instance
    user_model = UserModel(user_name)

    print(f"AJ: Nice to meet you, {user_name}! What are your interests?")
    user_interests = input("User: ")
    user_model.add_personal_information('likes', user_interests)

    print("AJ: And what are your dislikes?")
    user_dislikes = input("User: ")
    user_model.add_personal_information('dislikes', user_dislikes)
    
    # Store user's interests and dislikes
    user_model.save_user_model()
    
    # Store user's interests and dislikes
    save_conversation("Interests", [], user_name, user_interests, user_dislikes)
    save_conversation("Dislikes", [], user_name, user_interests, user_dislikes)

    print(f"\nAJ: Hi {user_name}, feel free to ask me anything about Taylor Swift.")

    while True:
        user_input = input("\nYou: ")
        
        # Save user's input question
        save_conversation(user_input, [], user_name, user_interests, user_dislikes)

        # Greetings
        if user_input.lower() in ["hi", "hello"]:
            bot_response = get_random_response(user_input)
            print("AJ:", bot_response)
            save_conversation(user_input, [bot_response], user_name, user_interests, user_dislikes)
            print("AJ: Enter 'bye' to end the conversation.")
            continue

        # Farewell
        if user_input.lower() in ["bye", "Bye AJ", "goodbye"]:
            bot_response = get_random_response(user_input)
            print("AJ:", bot_response)
            save_conversation(user_input, [bot_response], user_name, user_interests, user_dislikes)
            break

        # Process user input
        doc = nlp(user_input)
        nouns = [token.text for token in doc if token.pos_ == "NOUN"]
        actions = [token.text for token in doc if token.pos_ == "VERB"]

        # Implement knowledge base retrieval
        fact = fetch_information_from_knowledge_base(user_input, knowledge_base)
        if fact:
            bot_response = fact
        else:
            # Perform web lookup if knowledge base does not contain relevant information
            web_results = web_lookup(user_input)
            if web_results:
                bot_response = web_results
            else:
                bot_response = "I'm not sure about that."

        cleaned_response = clean_bot_response(bot_response)
        for response in cleaned_response:
            print("AJ:", response)
        save_conversation(user_input, cleaned_response, user_name, user_interests, user_dislikes)
        print("AJ: Enter 'bye' to end the conversation.")

In [38]:
if __name__ == "__main__":
    chatbot()

 -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  - 
AJ: Hello! I'm AJ chatbot.
 -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  - 

AJ: Hi there! Please tell me your name.
User: Hrushi
AJ: Nice to meet you, Hrushi! What are your interests?
User: Cars, music
AJ: And what are your dislikes?
User: Nothing

AJ: Hi Hrushi, feel free to ask me anything about Taylor Swift.

You: when is Taylor born
AJ: Life and career Early life Taylor Alison Swift was born on December 13, 1989, in West Reading, Pennsylvania
AJ: 
AJ: Enter 'bye' to end the conversation.

You: who is travis
AJ: She began dating American football player Travis Kelce in 2023, which has had a significant cultural impact, including a contribution of $331
AJ: 5 million in brand value for the National Football League
AJ: Enter 'bye' to end the conversation.

You: how many jets does taylor own
AJ: In 2021, 26 tracks from her second re-recorded album, Red , simultaneously debuted on the Hot 100, marking t