In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

In [3]:
# load data from JSON file
with open('/kaggle/input/university-chatbot-dataset/intents.json') as file:
    data = json.load(file)

# extract text and intent from data
texts = []
intents = []
for intent in data['intents']:
    for text in intent['text']:
        texts.append(text)
        intents.append(intent['intent'])

# tokenize text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
encoded_texts = tokenizer.texts_to_sequences(texts)

# save tokenizer
import pickle

with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# pad sequences to have equal length
max_len = max([len(x) for x in encoded_texts])
padded_texts = pad_sequences(encoded_texts, maxlen=max_len, padding='post')

# create label encoder object
le = LabelEncoder()

# fit and transform the intents to integer labels
encoded_intents = le.fit_transform(intents)

# get the number of unique labels
num_intents = len(le.classes_)

# apply one-hot encoding to the integer labels
encoded_intents = tf.one_hot(encoded_intents, depth=num_intents)

# define model architecture
input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_len)(input_layer)
lstm_layer = LSTM(128)(embedding_layer)
output_layer = Dense(num_intents, activation='softmax')(lstm_layer)
model = Model(inputs=input_layer, outputs=output_layer)

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# train model
model.fit(padded_texts, encoded_intents, epochs=50, batch_size=16)

# save model
model.save('chatbot_model34.h5')

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/university-chatbot-dataset/intents.json'

In [None]:

import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# load data from JSON file
with open('/kaggle/input/university-chatbot-dataset/intents.json') as file:
    data = json.load(file)

# extract text and intent from data
texts = []
intents = []
for intent in data['intents']:
    for text in intent['text']:
        texts.append(text)
        intents.append(intent['intent'])

# tokenize text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# load saved model
model = load_model('/kaggle/working/chatbot_model34.h5')

# define maximum sequence length
max_len = model.input_shape[1]

# create label encoder object
le = LabelEncoder()
le.fit(intents)

# create inverse mapping of label encoder for intent prediction
intent_mapping = {i: label for i, label in enumerate(le.classes_)}

# start chatbot interaction
print('Welcome to the chatbot! Type "quit" to exit.')
while True:
    # get user input
    #user_input = input('You: ').lower().strip()  #for user input run this line
    user_input = "quit"
    
    # check if user wants to quit
    if user_input == 'quit':
        break
    
    # encode user input text
    encoded_input = tokenizer.texts_to_sequences([user_input])
    padded_input = pad_sequences(encoded_input, maxlen=max_len, padding='post')
    
    # predict intent
    intent_prob = model.predict(padded_input)[0]
    intent_idx = np.argmax(intent_prob)
    intent_label = le.inverse_transform([intent_idx])[0]
    
    # retrieve response
    for intent in data['intents']:
        if intent['intent'] == intent_label:
            response = np.random.choice(intent['responses'])
            print('Chatbot:', response)
            break
