In [None]:
import nltk
import numpy as np
import random
import string

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


nltk.download('punkt')
nltk.download('wordnet')


faq_corpus = [
    "Hello, how can I help you?",
    "Hi there, how can I assist?",
    "What is natural language processing?",
    "NLP is a field of AI that gives machines the ability to read, understand, and derive meaning from human language.",
    "What is machine learning?",
    "Machine learning is the study of computer algorithms that improve automatically through experience.",
    "How does deep learning work?",
    "Deep learning is a subset of machine learning that uses neural networks with many layers (deep architectures).",
    "Who created you?",
    "I was created by a team of AI enthusiasts.",
    "Thank you!",
    "You're welcome!",
    "Goodbye!"
]

faq_responses = [
    "Hello! How can I assist you today?",
    "Hi! What can I do for you?",
    "Natural language processing, or NLP, is a branch of AI that deals with the interaction between computers and humans through language.",
    "NLP involves enabling computers to understand and respond to human language, helping in tasks such as translation, summarization, and conversation.",
    "Machine learning refers to algorithms that improve based on experience or data.",
    "Machine learning is about creating models that allow computers to learn patterns from data and make decisions or predictions.",
    "Deep learning involves neural networks with multiple layers, often used for complex tasks such as image recognition, language translation, and more.",
    "I was created using Python, machine learning, and some good old-fashioned programming.",
    "I'm glad I could help! Anything else?",
    "You're very welcome!",
    "Goodbye! Feel free to come back if you need more help!"
]
def tokenize(text):
    return nltk.word_tokenize(text.lower())

lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))


greeting_inputs = ("hello", "hi", "greetings", "sup", "what's up", "hey")
greeting_responses = ["Hello", "Hi", "Greetings!", "Hey there!", "Hello! How can I assist you today?"]

def greeting(sentence):
    """Return a greeting response if the user's input is a greeting."""
    for word in sentence.split():
        if word.lower() in greeting_inputs:
            return random.choice(greeting_responses)

def generate_response(user_input):
    bot_response = ''
    faq_corpus.append(user_input)
    tfidf_vectorizer = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(faq_corpus)


    similarity_values = cosine_similarity(tfidf_matrix[-1], tfidf_matrix)
    index = similarity_values.argsort()[0][-2]
    flat = similarity_values.flatten()
    flat.sort()
    req_tfidf = flat[-2]

    if req_tfidf == 0:
        bot_response = "I am sorry, I don't understand. Could you rephrase that?"
    else:
        bot_response = faq_responses[index]

    faq_corpus.pop()
    return bot_response


def chatbot():
    print("Chatbot: Hello! I am an NLP chatbot. Type 'bye' to exit.")

    while True:
        user_input = input("You: ").lower()
        if user_input == 'bye':
            print("Chatbot: Goodbye!")
            break
        else:
            if greeting(user_input) is not None:
                print(f"Chatbot: {greeting(user_input)}")
            else:
                print(f"Chatbot: {generate_response(user_input)}")
chatbot()
audio.mp3

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Chatbot: Hello! I am an NLP chatbot. Type 'bye' to exit.
You: hi
Chatbot: Hi
