### Importing Libraries

In [1]:
import json 
import numpy as np
import nltk
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords
import tensorflow as tf
import pickle 
import string
import tflearn
import random

Instructions for updating:
non-resource variables are not supported in the long term
curses is not supported on this machine (please install/reinstall curses for an optimal experience)


### Importing json file

In [21]:
# Open the JSON file
with open('Intentions_data_file.json', encoding='utf-8') as json_data:
    intentions_data = json.load(json_data)

# Get the 'intentions' list from the JSON data
intentions_list = intentions_data.get('intentions', [])
for index, intention in enumerate(intentions_list[:5]):
    print(f"Tag : {intention['tag']}: \nPattern :{intention['patterns']}: \nResponse :{intention['responses']} \n")

Tag : Welcome: 
Pattern :['Hello', 'Hi', 'Hi there', 'Hello there', 'Good evening', "Hello, I'm a new student, I have some questions, please!", 'Hi, I have some questions for you']: 
Response :['Great! Hi! How can I help?', 'Good! Hi, how can I help you?', "Welcome to our school! I'm here to help you. How can I assist you?", "Hello! I'm here to assist you. How can I help you?", "Welcome! I'm here to assist you. What specific information are you looking for?"] 

Tag : CourtesyGreeting: 
Pattern :['How are you?', 'Hi how are you?', 'Hello how are you?', 'Hola how are you?', 'How are you doing?', 'Hope you are doing well?', 'Hello hope you are doing well?']: 
Response :['Hello, I am great, how are you?', 'Hello, how are you? I am great thanks!', 'Hello, I am good thank you, how are you?', 'Hi, I am great, how are you?', 'Hi, how are you? I am great thanks!', 'Hi, I am good thank you, how are you?', 'Hi, good thank you, how are you?'] 

Tag : Name: 
Pattern :['What is your name?', 'What co

### Preprocessing data

In [3]:
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [4]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [5]:
stop_words = set(stopwords.words('english'))

stemmer = SnowballStemmer('english')  

mots = [] 
classes = [] 
documents = [] 

for intention in intentions_data['intentions']:
    for pattern in intention['patterns']:  

        m = nltk.word_tokenize(pattern) 
        m = [u for u in m if u not in string.punctuation if u not in stop_words]
        
        mots.extend(m) 

        documents.append((m, intention['tag'])) 
        if intention['tag'] not in classes:
            classes.append(intention['tag'])

mots = [stemmer.stem((m.lower())) for m in mots]


mots = sorted(list(set(mots))) 
classes = sorted(list(set(classes))) 
print(classes)

['Achievements', 'Administration', 'Awards', 'BookBorrowing', 'CSIOpportunities', 'CSIProgram', 'Certifications', 'Clubs', 'ClubsSocialMedia', 'CourtesyGreeting', 'DIE', 'DIEOpportunities', 'Doctorate', 'DoctorateApplication', 'DoctorateEligible', 'DoctorateFields', 'EngineeringSpecializations', 'EventTypes', 'Events', 'Exams', 'Feedback', 'Goodbye', 'IMOpportunities', 'IMProgram', 'ISDOpportunities', 'ISDT', 'ISSCD', 'ISSCDOpportunities', 'ITSupport', 'Invitations', 'KEDS', 'KEDSOpportunities', 'Library', 'LibraryHours', 'MECOHVOpportunities', 'MECOHVProgram', 'MasterFields', 'Name', 'Registration', 'Restaurant', 'Scholarships', 'SchoolSocialMedia', 'Specialization', 'Sport', 'StudyHours', 'Teachers', 'Thanks', 'Understand', 'Welcome']


### Creating data for modeling

In [6]:
training = []
output = []
output_vide = [0] * len(classes) 
for doc in documents:
    ensemble = []
    pattern_mots = doc[0] 
    pattern_mots = [stemmer.stem(mot.lower()) for mot in pattern_mots] 

    for m in mots: 
        ensemble.append(1) if m in pattern_mots else ensemble.append(0)

    output_row = list(output_vide) 
    
    output_row[classes.index(doc[1])] = 1    

    training.append([ensemble, output_row]) 
training = np.array(training) 

### Spliting training data into features and labels lists

In [7]:
train_x = list(training[:, 0])
train_y = list(training[:, 1])

### Model creation and training

In [8]:
# Reset the default graph
tf.compat.v1.reset_default_graph()

# Define the model architecture
net = tflearn.input_data(shape=[None, len(train_x[0])]) # input layer # len(train_x[0]) is the number of distinct words processed(features)

# Fully connected layers with regularization
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
# Output layer with softmax activation
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax') # len(train_y[0]) is the number of labels(tags)
net = tflearn.regression(net)

# Create the mode
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
# Fit the model
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)

Training Step: 10999  | total loss: [1m[32m0.06550[0m[0m | time: 0.053s
| Adam | epoch: 1000 | loss: 0.06550 - acc: 0.9699 -- iter: 80/84
Training Step: 11000  | total loss: [1m[32m0.05921[0m[0m | time: 0.057s
| Adam | epoch: 1000 | loss: 0.05921 - acc: 0.9729 -- iter: 84/84
--


In [14]:
model.save('model.teflearn')

INFO:tensorflow:C:\Users\hp\Downloads\Projet Chatbot\Chatbot_Esibot\LastVersions\model.teflearn is not in all_model_checkpoint_paths. Manually adding it.


### Preprocess for new data

In [18]:
pickle.dump({'mots': mots, 'classes': classes, 'train_x': train_x, 'train_y': train_y}, open("training_data", 'wb'))

data = pickle.load(open("training_data", "rb"))

mots = data['mots']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']
with open('Intentions_data_file.json', encoding='utf-8') as json_data:
    intentions_data = json.load(json_data)

def traitement_expression(expression):
    expression_mots = nltk.word_tokenize(expression)
    expression_mots = [u for u in expression_mots if u not in string.punctuation if u not in stop_words]
    expression_mots = [stemmer.stem(mot.lower()) for mot in expression_mots]
    return expression_mots


def ensemble_tab(expression, mots, show_details=False):
    expression_mots = traitement_expression(expression)
    ensemble = [0] * len(mots)
    for e in expression_mots:
        for i, m in enumerate(mots):
            if m == e:
                ensemble[i] = 1  
                if show_details:
                    print("it's found: %e" % m)
    return (np.array(ensemble))

def classification(expression): 
    results = model.predict([ensemble_tab(expression, mots)])[0] 
    results = [[i, r] for i, r in enumerate(results)]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    return return_list 

In [19]:
def response(expression, ID_Utilisateur='123', show_details=False):
    results = classification(expression)
    if results:
        while results:
            for i in intentions_data['intentions']: 
                if i['tag'] == results[0][0]:  
                    return print('\n',random.choice(i['responses']), "\n")
            results.pop(0)

### Test

In [20]:
print("Hello! I am here to chat. if you need to go just enter \"quit\" :)")            
while True:
    input_data = input("You: ")
    if input_data.lower() in ["exit", "quit", "stop", "bye", "leave","goodbye", "good bye", "see you later"]:
        print("Goodbye!")
        break  # Exit the while loop
    answer = response(input_data)
    answer

Hello! I am here to chat. if you need to go just enter "quit" :)
You: Hi! I am a new student and I would like to ask you some questions, please!

 Welcome to our school! I'm here to help you. How can I assist you? 

You: I have just started my first year in Knowledge Engineering and Data Science, can you give me detailed information about training in this field?

 The program aims to provide comprehensive training that combines modules in mathematics/statistics, computer science, and artificial intelligence. It aims to equip students with the skills demanded by the job market to manage and analyze data, including concepts such as exploratory data analysis, statistical inference and modeling, machine learning, and high-dimensional data analysis. 

You: Where can I find information about school clubs?

 You can find information about school clubs in the 'Clubs and Student Organizations' section. This section lists available clubs, their descriptions, meeting times, and contact informatio