In [6]:
import nltk
import random
import json
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score

In [7]:
with open('intents.json') as file:
    intents = json.load(file)

In [8]:
intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi',
    'How are you?',
    'Is anyone there?',
    'Hello',
    'Good day',
    "What's up",
    'how are ya',
    'heyy',
    'whatsup',
    '??? ??? ??'],
   'responses': ['Hello!',
    'Good to see you again!',
    'Hi there, how can I help?'],
   'context_set': ''},
  {'tag': 'goodbye',
   'patterns': ['cya',
    'see you',
    'bye bye',
    'See you later',
    'Goodbye',
    'I am Leaving',
    'Bye',
    'Have a Good day',
    'talk to you later',
    'ttyl',
    'i got to go',
    'gtg'],
   'responses': ['Sad to see you go :(',
    'Talk to you later',
    'Goodbye!',
    'Come back soon'],
   'context_set': ''},
  {'tag': 'creator',
   'patterns': ['what is the name of your developers',
    'what is the name of your creators',
    'what is the name of the developers',
    'what is the name of the creators',
    'who created you',
    'your developers',
    'your creators',
    'who are your developers',
    'developers',
  

In [9]:
text_data = []
labels = []
stopwords = set(nltk.corpus.stopwords.words('english'))
for intent in intents['intents']:
    for example in intent['patterns']:
        tokens = nltk.word_tokenize(example.lower())
        text_data.append(' '.join([token for token in tokens if token not in stopwords and token.isalpha()]))
        labels.append(intent['tag'])

vectorizer =  TfidfVectorizer()
X = vectorizer.fit_transform(text_data)
y = labels

In [10]:
def find_best_model(X, y, test_size=0.2):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    
   
    models = [
        ('Logistic Regression', LogisticRegression(), {'C': [0.1, 1, 10], 'max_iter': [10000]}),
        ('Multinomial Naive Bayes', MultinomialNB(), {'alpha': [0.1, 0.5, 1.0]}),
        ('Linear SVC', LinearSVC(), {'C': [0.1, 1, 10], 'max_iter': [10000]}),
        ('Decision Tree', DecisionTreeClassifier(), {'max_depth': [10, 20, 30], 'min_samples_split': [2, 5, 10]}),
        ('Random Forest', RandomForestClassifier(), {'n_estimators': [100, 200, 300], 'max_depth': [10, 20, 30], 'min_samples_split': [2, 5, 10]})
    ]
    

    

    for name, model, param_grid in models:
        grid = GridSearchCV(model, param_grid, cv=5, n_jobs=-1)
        grid.fit(X_train, y_train)
        y_pred = grid.predict(X_test)
        score = accuracy_score(y_test, y_pred)
        print(f'{name}: {score:.4f} (best parameters: {grid.best_params_})')
 
    best_model = max(models, key=lambda x: GridSearchCV(x[1], x[2], cv=5, n_jobs=-1).fit(X_train, y_train).score(X_test, y_test))
    print(f'\nBest model: {best_model[0]}')
    return best_model[1]

In [11]:
best_model = find_best_model(X, y)

Logistic Regression: 0.7654 (best parameters: {'C': 10, 'max_iter': 10000})
Multinomial Naive Bayes: 0.7037 (best parameters: {'alpha': 0.1})
Linear SVC: 0.7654 (best parameters: {'C': 1, 'max_iter': 10000})
Decision Tree: 0.5062 (best parameters: {'max_depth': 30, 'min_samples_split': 5})
Random Forest: 0.7037 (best parameters: {'max_depth': 30, 'min_samples_split': 5, 'n_estimators': 100})

Best model: Logistic Regression


In [12]:
best_model.fit(X, y)

In [13]:
def chatbot_response(user_input):
    input_text = vectorizer.transform([user_input])
    predicted_intent = best_model.predict(input_text)[0]
    
    for intent in intents['intents']:
        if intent['tag'] == predicted_intent:
            response = random.choice(intent['responses'])
            break
            
    return response

In [14]:
print('Hello! I am a chatbot. How can I help you today? Type "quit" to exit.')
while True:
    user_input = input('> ')
    if user_input.lower() == 'quit':
        break
    response = chatbot_response(user_input)
    print(response)

Hello! I am a chatbot. How can I help you today? Type "quit" to exit.
> hello
Hello!
> FEEs
For Fee detail visit <a target="_blank" href="LINK"> here</a>
> Sports
Our university encourages all-round development of students and hence provides sports facilities in the campus. For more details visit<a target="_blank" href=/"(LINK IF HAVE)">here</a>
> weird
Our university offers Information Technology, computer Engineering, Mechanical engineering,Chemical engineering, Civil engineering and extc Engineering.
> yes
Our university offers Information Technology, computer Engineering, Mechanical engineering,Chemical engineering, Civil engineering and extc Engineering.
> What's up
Our university offers Information Technology, computer Engineering, Mechanical engineering,Chemical engineering, Civil engineering and extc Engineering.
> whats up
You can call me Mind Reader.
> whats is you doing
You can call me Mind Reader.
> Goodbye
Talk to you later
> quit


In [15]:
import pickle


with open('chatbot_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)


with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

with open('intents.json', 'w') as f:
    json.dump(intents, f)