In [1]:
import nltk
from nltk.stem import WordNetLemmatizer
import json
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

# Load intents data
with open("chatbot_dataset.json", "r") as file:
    intents = json.load(file)

lemmatizer = WordNetLemmatizer()

# Prepare training data
texts = []
labels = []
classes = []

# Loop through the intents and prepare the data
for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        # Tokenize and lemmatize the patterns
        tokens = nltk.word_tokenize(pattern)
        lemmatized_tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens]
        texts.append(" ".join(lemmatized_tokens))
        labels.append(intent["tag"])
    classes.append(intent["tag"])

# Convert texts to feature vectors using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)

# Label encoding for the tags
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the classifier (Linear SVC)
classifier = LinearSVC()
classifier.fit(X_train, y_train)

# Simple chatbot function to get response
def chatbot_response(text):
    tokens = nltk.word_tokenize(text)
    lemmatized_tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens]
    input_data = vectorizer.transform([" ".join(lemmatized_tokens)])
    
    prediction = classifier.predict(input_data)
    tag = label_encoder.inverse_transform(prediction)[0]
    
    for intent in intents["intents"]:
        if intent["tag"] == tag:
            return np.random.choice(intent["responses"])

# Chatbot loop
print("Chatbot: Hello! I'm your assistant. Type 'exit' to end the conversation.")
while True:
    user_input = input("You: ")
    if user_input.lower() == "exit":
        print("Chatbot: Goodbye!")
        break
    response = chatbot_response(user_input)
    print(f"Chatbot: {response}")


Chatbot: Hello! I'm your assistant. Type 'exit' to end the conversation.


You:  exit


Chatbot: Goodbye!


In [3]:
import pickle
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

# Sample model and data (replace this with your actual training code)
data = ["Hello", "How are you?", "Goodbye"]
labels = ["greeting", "greeting", "farewell"]

# Create a simple model (replace with your actual trained model)
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

classifier = LinearSVC()
classifier.fit(X, y)

# Save the model to a .pkl file
with open("chatbot_model.pkl", "wb") as model_file:
    pickle.dump(classifier, model_file)

with open("label_encoder.pkl", "wb") as le_file:
    pickle.dump(label_encoder, le_file)

with open("vectorizer.pkl", "wb") as vec_file:
    pickle.dump(vectorizer, vec_file)

print("Model and components exported successfully!")

# For Google Colab users: Download the files
try:
    from google.colab import files
    files.download("chatbot_model.pkl")
    files.download("label_encoder.pkl")
    files.download("vectorizer.pkl")
except ImportError:
    print("The code is not running in Google Colab. Please download the files manually.")


Model and components exported successfully!
The code is not running in Google Colab. Please download the files manually.
