In [3]:
!pip install spacy gensim
!python -m spacy download en_core_web_sm

Collecting spacy
  Downloading spacy-3.8.4-cp311-cp311-win_amd64.whl (12.2 MB)
     --------------------------------------- 12.2/12.2 MB 10.4 MB/s eta 0:00:00
Collecting gensim
  Downloading gensim-4.3.3-cp311-cp311-win_amd64.whl (24.0 MB)
     ---------------------------------------- 24.0/24.0 MB 6.8 MB/s eta 0:00:00
Collecting spacy-legacy<3.1.0,>=3.0.11
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl (29 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0
  Downloading spacy_loggers-1.0.5-py3-none-any.whl (22 kB)
Collecting murmurhash<1.1.0,>=0.28.0
  Downloading murmurhash-1.0.12-cp311-cp311-win_amd64.whl (25 kB)
Collecting cymem<2.1.0,>=2.0.2
  Downloading cymem-2.0.11-cp311-cp311-win_amd64.whl (39 kB)
Collecting preshed<3.1.0,>=3.0.2
  Downloading preshed-3.0.9-cp311-cp311-win_amd64.whl (122 kB)
     -------------------------------------- 122.3/122.3 kB 2.4 MB/s eta 0:00:00
Collecting thinc<8.4.0,>=8.3.4
  Downloading thinc-8.3.4-cp311-cp311-win_amd64.whl (1.5 MB)
     ------------


[notice] A new release of pip available: 22.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 12.8/12.8 MB 9.9 MB/s eta 0:00:00
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')



[notice] A new release of pip available: 22.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [31]:
import json
import spacy
import numpy as np
import gensim
from gensim.models import Word2Vec
import pickle
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import load_model

# Load SpaCy English model
nlp = spacy.load("en_core_web_sm")


In [5]:
# Load intents JSON file
with open("intents.json", "r", encoding="utf-8") as file:
    intents = json.load(file)

In [24]:
sentences = []
words = []
classes = []
documents = []
ignore_words = ["?", "!", ".", ","]

In [25]:
for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        # Process text using SpaCy (tokenization, lemmatization, stopword removal)
        doc = nlp(pattern.lower())  # Convert to lowercase
        tokens = [token.lemma_ for token in doc if token.text not in ignore_words and not token.is_stop]

        # Store words in vocabulary
        words.extend(tokens)  # <- ✅ Missing in previous code

        sentences.append(tokens)
        documents.append((tokens, intent["tag"]))

        # Add unique class labels
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

In [26]:
words = sorted(set(words))  # <- ✅ Ensures words are unique and sorted

In [28]:
w2v_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

In [29]:
# Save the trained Word2Vec model
w2v_model.save("word2vec.model")

In [27]:
# Save words and classes
pickle.dump(classes, open("classes.pkl", "wb"))
pickle.dump(words, open("words.pkl", "wb"))

In [11]:
# Function to convert sentences to vectors
def sentence_to_vector(sentence, w2v_model):
    doc = nlp(sentence.lower())
    tokens = [token.lemma_ for token in doc if token.text not in ignore_words and not token.is_stop]
    
    word_vectors = [w2v_model.wv[word] for word in tokens if word in w2v_model.wv]
    
    if len(word_vectors) == 0:
        return np.zeros(100)  # Return zero vector if no word found
    
    return np.mean(word_vectors, axis=0)

# Prepare training data
X_train = []
y_train = []

for doc, tag in documents:
    X_train.append(sentence_to_vector(" ".join(doc), w2v_model))
    y_train.append(classes.index(tag))

X_train = np.array(X_train)
y_train = np.array(y_train)

# Save training data
np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)

print("Preprocessing completed!")


Preprocessing completed!


In [17]:
# Define model
# Load preprocessed data
X_train = np.load("X_train.npy")
y_train = np.load("y_train.npy")

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes=len(set(y_train)))  # One-hot encode labels

# Define Neural Network model
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Correct way to define input shape
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(64, activation="relu"),
    Dropout(0.5),
    Dense(len(y_train[0]), activation="softmax")  # Output layer with softmax activation
])

# Compile the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=200, batch_size=5, verbose=1)

# Save the trained model
model.save("chatbot_model.h5")

print("Model training completed!")

Epoch 1/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.0849 - loss: 2.4847      
Epoch 2/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2152 - loss: 2.4837 
Epoch 3/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0127 - loss: 2.4849     
Epoch 4/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2670 - loss: 2.4753 
Epoch 5/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1894 - loss: 2.4772     
Epoch 6/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2055 - loss: 2.4784 
Epoch 7/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1455 - loss: 2.4800 
Epoch 8/200
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1233 - loss: 2.4758     
Epoch 9/200
[1m8/8[0m [32m━━━━━━━━━━



Model training completed!


In [None]:
model = load_model("chatbot_model.h5")

# Load Word2Vec model
w2v_model = Word2Vec.load("word2vec.model")

# Load intents file
with open("intents.json", "r", encoding="utf-8") as file:
    intents = json.load(file)

# Load words and classes
words = pickle.load(open("words.pkl", "rb"))
classes = pickle.load(open("classes.pkl", "rb"))

# Function to convert a sentence into a Word2Vec vector
def sentence_to_vector(sentence, w2v_model):
    words = sentence.lower().split()  # Simple tokenization (use SpaCy for better results)
    vector = [w2v_model.wv[word] for word in words if word in w2v_model.wv]
    
    if vector:
        return sum(vector) / len(vector)  # Averaging word vectors
    else:
        return [0] * w2v_model.vector_size  # Handle unknown words

# Function to predict intent
def predict_intent(sentence):
    vector = sentence_to_vector(sentence, w2v_model)
    if not any(vector):  # Check if vector is empty (all zeros)
        return []

    vector = vector.reshape(1, -1)  # Reshape for model input
    predictions = model.predict(vector)[0]
    
    ERROR_THRESHOLD = 0.25
    results = [[i, p] for i, p in enumerate(predictions) if p > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)

    return [{"intent": classes[r[0]], "probability": str(r[1])} for r in results]

# Function to get chatbot response
def chatbot_response(sentence):
    intents_list = predict_intent(sentence)
    if intents_list:
        tag = intents_list[0]["intent"]
        for intent in intents["intents"]:
            if intent["tag"] == tag:
                return random.choice(intent["responses"])
    return "I'm not sure how to answer that."

# Interactive chat loop
def chat():
    print("🤖 Chatbot is ready! Type 'quit' to exit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "quit":
            print("Chatbot: Goodbye! Have a great day! 😊")
            break
        response = chatbot_response(user_input)
        print(f"Chatbot: {response}")

# Start chatbot
chat()



🤖 Chatbot is ready! Type 'quit' to exit.


You:  hi


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Chatbot: Hey there! Ready to explore the world of engineering?


You:  where is the college


Chatbot: I'm not sure how to answer that.


You:  what are the depts here


Chatbot: I'm not sure how to answer that.


You:  what are the departments available


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Chatbot: Yes, hostel life is an experience you won’t forget—good food, great friends, and occasional power cuts for that true ‘survival mode’ feel. Apply early to grab a spot!


You:  gcect contact


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Chatbot: You can call us at 033-2370 1263, or if you prefer writing over talking, drop an email at principal.gcect@gcect.ac.in. Just don’t expect an instant reply—professors are busy people!


You:  location


Chatbot: I'm not sure how to answer that.
