In [1]:
import json
import numpy as np
import random
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD

In [2]:
# Charger le fichier JSON
with open('data/intents.json') as file:
    data = json.load(file)

In [3]:
from nltk.tokenize import word_tokenize

lemmatizer = WordNetLemmatizer()

In [4]:
# Prétraitement des données
words = []
classes = []
documents = []
ignore_words = ['?', '!', '.', ',','']

# Extraire les patterns et les tags
for intent in data['intents']:
    for pattern in intent['patterns']:
        # Tokenisation des mots dans chaque pattern
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        # Ajouter dans les documents (pattern, tag)
        documents.append((word_list, intent['tag']))
        # Ajouter le tag aux classes
        if intent['tag'] not in classes:
            classes.append(intent['tag'])



In [5]:
# Lemmatisation et suppression des mots ignorés
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(set(words))

classes = sorted(set(classes))

In [6]:
# Affichage des résultats de prétraitement
print(f"Classes: {classes}")
print(f"Words: {words}")
print(f"Documents: {documents}")

Classes: ['akanda', 'aurevoir', 'destination', 'itineraire_akanda', 'itineraire_libreville', 'itineraire_loango', 'itineraire_lope', 'itineraire_monts_cristal', 'itineraire_pointe_denis', 'itineraire_pongara', 'libreville', 'loango', 'lope', 'monts_cristal', 'pointe_denis', 'pongara', 'remerciment', 'reservation_akanda', 'reservation_libreville', 'reservation_loango', 'reservation_lope', 'reservation_monts_cristal', 'reservation_pointe_denis', 'reservation_pongara', 'salutation']
Words: ['activitã©s', 'aide', 'akanda', 'au', 'aux', 'besoin', 'bonjour', 'bonne', 'bonsoir', 'bye', 'cherche', 'circuit', 'comment', 'coucou', 'cristal', 'crristal', "d'akanda", "d'un", 'de', 'denis', 'destination', 'dis', 'donner', 'faire', 'fais', 'fait', 'gabon', 'hello', 'hey', 'itinã©raire', "j'ai", 'je', 'journã©ã©', 'la', 'le', 'libreville', 'lieu', 'loango', 'lopã©', 'me', 'meilleures', 'merci', 'moi', 'montq', 'monts', 'national', 'on', 'parc', 'parcours', 'plus', 'pointe', 'pongagara', 'pongara', 'p

In [7]:
import pickle
pickle.dump(words, open('save/words.pkl', 'wb'))
pickle.dump(classes, open('save/classes.pkl', 'wb'))

In [8]:
# Création des données d'entraînement
training = []
output_empty = [0] * len(classes)

for doc in documents:
    bag = []
    word_patterns = doc[0]
    word_patterns = [lemmatizer.lemmatize(w.lower()) for w in word_patterns]
    
    for w in words:
        bag.append(1 if w in word_patterns else 0)
    
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

# Mélanger et convertir en numpy array
# Mélanger et convertir en numpy array
random.shuffle(training)

# Séparer les features et les labels
train_x = np.array([item[0] for item in training], dtype=np.float32)
train_y = np.array([item[1] for item in training], dtype=np.float32)

In [9]:
print(f"train_x shape: {train_x.shape}")
print(f"train_y shape: {train_y.shape}")
print(f"train_x dtype: {train_x.dtype}")
print(f"train_y dtype: {train_y.dtype}")

train_x shape: (107, 90)
train_y shape: (107, 25)
train_x dtype: float32
train_y dtype: float32


In [10]:
# Construction du modèle séquentiel
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compiler le modèle
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

# Entraîner le modèle
model.fit(train_x, train_y, epochs=2000, batch_size=5, verbose=1)

# Sauvegarder le modèle et les fichiers essentiels
model.save("model/chatbot_model.h5")

Epoch 1/2000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0455 - loss: 3.2482    
Epoch 2/2000
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0469 - loss: 3.2294     
Epoch 3/2000
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.1338 - loss: 3.1241 
Epoch 4/2000
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.1201 - loss: 3.0355     
Epoch 5/2000
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.1316 - loss: 3.0061     
Epoch 6/2000
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2655 - loss: 2.7577 
Epoch 7/2000
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2161 - loss: 2.5677     
Epoch 8/2000
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2982 - loss: 2.3777 
Epoch 9/2000
[1m22/22[0m

