<a href="https://colab.research.google.com/github/shubham6907/ChatBots/blob/main/Retrieval_Based_ChatBot_using_Natural_Language_Toolkit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Using Natural Language Toolkit (nltk)

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
nltk.download('stopwords')

# Sample conversation data
conversation_data = [
    "Hi there!",
    "Hello!",
    "How are you doing?",
    "I'm good, thank you. How about you?",
    # Add more conversation data
]

# Preprocess the conversation data
def preprocess_text(text):
    # Tokenize
    tokens = word_tokenize(text.lower())
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    # Stemming
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]
    return ' '.join(tokens)

preprocessed_data = [preprocess_text(text) for text in conversation_data]

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(preprocessed_data)

def get_response(user_input):
    # Preprocess user input
    preprocessed_input = preprocess_text(user_input)

    # Transform user input using the vectorizer
    user_vector = vectorizer.transform([preprocessed_input])

    # Calculate cosine similarities between the user input and the conversation data
    similarities = cosine_similarity(user_vector, tfidf_matrix)

    # Find the index of the most similar response
    index_of_most_similar = similarities.argmax()

    # Return the corresponding response
    return conversation_data[index_of_most_similar]


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        break
    response = get_response(user_input)
    print("ChatBot:", response)

You: hi
ChatBot: Hi there!
You: exit
