In [20]:
# Install NLTK
!pip install nltk scikit-learn

# Import necessary libraries
import nltk
import random
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings

warnings.filterwarnings('ignore')

# Download NLTK resources
nltk.download('punkt')
nltk.download('wordnet')




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [19]:
# Create chatbot.txt with responses
chatbot_text = """
hi
hello
hey there
how are you
i am a chatbot, always happy to help you!
what is your name
i am a simple chatbot created to chat with you.
who created you
i was created by developers using python and nltk.
what can you do
i can chat with you and answer simple questions.
tell me about python
python is a high-level, interpreted programming language known for its simplicity and readability.
tell me about nltk
nltk is a leading platform for building python programs to work with human language data.
bye
goodbye! see you soon.
"""

with open('chatbot.txt', 'w') as f:
    f.write(chatbot_text)


In [21]:
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the chatbot knowledge base
with open('chatbot.txt', 'r', errors='ignore') as f:
    raw = f.read().lower()

# Tokenize the text
sent_tokens = nltk.sent_tokenize(raw)
word_tokens = nltk.word_tokenize(raw)

# Lemmatizer
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))


In [22]:
# Greeting inputs and responses
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey")
GREETING_RESPONSES = [
    "Hi there! How can I help you?",
    "Hello! Ask me anything.",
    "Hey! Ready to chat?",
    "Greetings! What would you like to know?"
]

def greeting(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

# Known question-answer pairs
KNOWN_RESPONSES = {
    "how are you": "I'm just a bot, but I'm doing great! How can I help you?",
    "what is your name": "I am a friendly chatbot created with Python and NLTK.",
    "who created you": "I was built by developers using Python and NLTK.",
    "what can you do": "I can chat with you, answer basic questions, and share some fun info!"
}

def check_known_responses(user_input):
    for question, answer in KNOWN_RESPONSES.items():
        if question in user_input:
            return answer
    return None


In [23]:
def response(user_response):
    # Check for known responses first
    known_answer = check_known_responses(user_response)
    if known_answer:
        return known_answer

    # Use cosine similarity as a fallback
    robo_response = ''
    sent_tokens.append(user_response)
    vectorizer = CountVectorizer(tokenizer=LemNormalize, stop_words='english')
    tf = vectorizer.fit_transform(sent_tokens)
    vals = cosine_similarity(tf[-1], tf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]

    if req_tfidf == 0:
        robo_response = "I'm sorry! I didn't quite get that. Could you rephrase?"
    else:
        robo_response = sent_tokens[idx]

    sent_tokens.remove(user_response)
    return robo_response


In [24]:
def chatbot():
    print("BOT: Hello! I am your chatbot. Type 'bye' to exit.")
    while True:
        user_response = input("You: ").lower()
        if user_response != 'bye':
            if user_response in ('thanks', 'thank you'):
                print("BOT: You're welcome!")
                break
            else:
                if greeting(user_response) is not None:
                    print("BOT:", greeting(user_response))
                else:
                    print("BOT:", response(user_response))
        else:
            print("BOT: Bye! Have a great day!")
            break

chatbot()


BOT: Hello! I am your chatbot. Type 'bye' to exit.
You: hi
BOT: Hello! Ask me anything.
You: how are you
BOT: I'm just a bot, but I'm doing great! How can I help you?
You: what is your name
BOT: I am a friendly chatbot created with Python and NLTK.
You: tell me about python
BOT: tell me about python
python is a high-level, interpreted programming language known for its simplicity and readability.
You: tell me about nltk
BOT: tell me about nltk
nltk is a leading platform for building python programs to work with human language data.
You: bye
BOT: Bye! Have a great day!
