In [1]:
pip install textblob


Note: you may need to restart the kernel to use updated packages.


In [2]:
import json
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from textblob import TextBlob

# Read the intents data from a JSON file
with open("intents.json", "r") as file:
    intents = json.load(file)

# Preprocess the data
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

def preprocess(text):
    words = nltk.word_tokenize(text.lower())
    words = [lemmatizer.lemmatize(word) for word in words if word.isalpha() and word not in stop_words]
    sentiment_scores = [TextBlob(word).sentiment.polarity for word in words]
    features = [str(sentiment_scores[i] * len(words[i])) + words[i] for i in range(len(words)) if words[i] not in stop_words]
    return " ".join(features)


# Prepare the data
corpus = []
labels = []

for intent in intents["intents"]:
    for text in intent["text"]:
        corpus.append(preprocess(text))
        labels.append(intent["intent"])

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus).toarray()
y = labels

# Split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train the classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

# Evaluate the classifier
score = classifier.score(X_test, y_test)
print("Accuracy: {:.2f}%".format(score * 100))

# Build the chatbot
def get_response(input_text):
    input_text = preprocess(input_text)
    input_vector = vectorizer.transform([input_text]).toarray()
    predicted_label = classifier.predict(input_vector)[0]
    for intent in intents["intents"]:
        if intent["intent"] == predicted_label:
            response = intent["responses"][0]
            return response
    return "Sorry, I don't understand"

# Test the chatbot
input_text = "hi"
response = get_response(input_text)
print(response)


Accuracy: 68.97%
Hi human, please tell me your GeniSys user


In [3]:
import json
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from textblob import TextBlob

from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM

# Read the intents data from a JSON file
with open("intents.json", "r") as file:
    intents = json.load(file)

# Preprocess the data
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

def preprocess(text):
    words = nltk.word_tokenize(text.lower())
    words = [lemmatizer.lemmatize(word) for word in words if word.isalpha() and word not in stop_words]
    sentiment_scores = [str(TextBlob(word).sentiment.polarity) for word in words]
    features = [sentiment_scores[i] + '*' + str(len(words[i])) for i in range(len(words))] # multiply sentiment by word length
    return " ".join(words + features)


corpus = []
labels = []

for intent in intents["intents"]:
    for text in intent["text"]:
        corpus.append(preprocess(text))
        labels.append(intent["intent"])

# Tokenize the data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
word_index = tokenizer.word_index
vocab_size = len(word_index)

X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X)

y = np.zeros((len(labels), len(set(labels))))
for i, label in enumerate(labels):
    y[i, list(set(labels)).index(label)] = 1

# Split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Build the model
model = Sequential()
model.add(Embedding(vocab_size + 1, 64, input_length=X.shape[1]))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(set(labels)), activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=60, batch_size=32)

# Evaluate the model
score = model.evaluate(X_test, y_test)
print("Accuracy: {:.2f}%".format(score[1] * 100))

# Build the chatbot
def get_response(input_text):
    input_text = preprocess(input_text)
    input_seq = tokenizer.texts_to_sequences([input_text])
    input_seq = pad_sequences(input_seq, maxlen=X.shape[1])
    predicted_label = np.argmax(model.predict(input_seq), axis=-1)[0]
    for intent in intents["intents"]:
        if list(set(labels))[predicted_label] == intent["intent"]:
            response = np.random.choice(intent["responses"])
            return response
    return "Sorry, I don't understand"

# Test the chatbot
input_text = "Hello there"
response = get_response(input_text)
print(response)


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Accuracy: 51.72%
Hello human, please tell me your GeniSys user


In [4]:
import json
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Read the intents data from a JSON file
with open("intents.json", "r") as file:
    intents = json.load(file)

# Preprocess the data
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

def preprocess(text):
    words = nltk.word_tokenize(text.lower())
    words = [lemmatizer.lemmatize(word) for word in words if word.isalpha() and word not in stop_words]
    sentiment_scores = [TextBlob(word).sentiment.polarity for word in words]
    features = [str(sentiment_scores[i] * len(words[i])) + words[i] for i in range(len(words)) if words[i] not in stop_words]
    return " ".join(features)

# Prepare the data
corpus = []
labels = []

for intent in intents["intents"]:
    for text in intent["text"]:
        corpus.append(preprocess(text))
        labels.append(intent["intent"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(corpus, labels, test_size=0.2)

# Build the pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(ngram_range=(1, 2))),
    ('clf', MultinomialNB()),
])

# Define the parameters for Grid Search
parameters = {
    'tfidf__max_df': [0.5, 0.75, 1.0],
    'tfidf__min_df': [1, 2, 3],
    'clf__alpha': [0.1, 1, 10],
}

# Perform Grid Search to find the best parameters
grid_search = GridSearchCV(pipeline, parameters, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate the classifier
y_pred = grid_search.predict(X_test)
score = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(score * 100))

# Build the chatbot
def get_response(input_text):
    input_text = preprocess(input_text)
    predicted_label = grid_search.predict([input_text])[0]
    for intent in intents["intents"]:
        if intent["intent"] == predicted_label:
            response = intent["responses"][0]
            return response
    return "Sorry, I don't understand"

# Test the chatbot
input_text = "hi"
response = get_response(input_text)
print(response)




Accuracy: 44.83%
Hi human, please tell me your GeniSys user
