In [5]:
import nltk
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK data
nltk.download("punkt")
nltk.download("wordnet")

# Sample data
patterns = ["Hi", "Hello", "How are you?", "What is your name?", "What can you do?", "Bye"]
responses = ["Hello!", "Hi there!", "I'm fine, thank you!", "I'm a chatbot.", "I can chat with you.", "Goodbye!"]

# Lemmatizer to preprocess the text
lemmatizer = WordNetLemmatizer()

def clean_up_sentence(sentence):
    words = nltk.word_tokenize(sentence)
    words = [lemmatizer.lemmatize(word.lower()) for word in words]
    return words

# Create a TfidfVectorizer with the clean_up_sentence tokenizer
vectorizer = TfidfVectorizer(tokenizer=clean_up_sentence)

# Transform the training data into numerical features
X = vectorizer.fit_transform(patterns)

# Create and train the Multinomial Naive Bayes model
model = MultinomialNB()
model.fit(X, responses)

# Save the trained model and vectorizer
joblib.dump(model, "chatbot_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ashle\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ashle\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['vectorizer.pkl']