In [None]:
'''
Nome: Marcos Paulo
Nome: Victor Manuel

Tema 6 - assunto futebol
'''

In [1]:
import json
import pickle
import nltk
import random
import numpy as np
from nltk.stem import WordNetLemmatizer
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

nltk.download('punkt')
nltk.download('wordnet')

# Define the file paths
intents_file = 'intents.json'
words_file = 'words.pkl'
classes_file = 'classes.pkl'
x_file = 'x.pkl'
y_file = 'y.pkl'
model_file = 'model.h5'
pdf_file = 'lista de dados.pdf'

# Load and preprocess the data
intents = json.loads(open(intents_file).read())
lemmatizer = WordNetLemmatizer()

words = []
documents = []
classes = ['intencoes', 'oquedeseja', 'assuntoFutebol']
ignore_words = ["!", "@", "#", "$", "%", "*", "?"]

for intent_class in intents:
    for intent in intents[intent_class]:
        tag = intent['tag']
        classes.append(tag)

    for pattern in intent['patterns']:
        words.extend(nltk.word_tokenize(pattern.lower()))
        documents.append((nltk.word_tokenize(pattern.lower()), tag))

words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))

# Save the preprocessed data
pickle.dump(words, open(words_file, 'wb'))
pickle.dump(classes, open(classes_file, 'wb'))

training = []
output_empty = [0] * len(classes)

# Extract patterns from intents
training_data = []
for intent in intents.values():
    for item in intent:
        patterns = item['patterns']
        tag = item['tag']
        for pattern in patterns:
            training_data.append((pattern, tag))


for document in documents:
    bag = []
    pattern_words = document[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]

    for word in words:
        bag.append(1) if word in pattern_words else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])

random.shuffle(training)
training = np.array(training)

x = list(training[:, 0])
y = list(training[:, 1])

pickle.dump(x, open(x_file, 'wb'))
pickle.dump(y, open(y_file, 'wb'))

# Define the neural network model
model = Sequential()
model.add(Dense(128, input_shape=(len(x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(y[0]), activation='softmax'))

# Compile and train the model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.fit(np.array(x), np.array(y), epochs=200, batch_size=5, verbose=1)
model.save(model_file)

# Generate a PDF file with data lists
def generate_pdf(data_lists):
    pdf_filename = pdf_file
    c = canvas.Canvas(pdf_filename, pagesize=letter)
    x = 50
    y = 750

    c.setFont("Helvetica", 12)
    c.drawString(x, y, "Lista de Palavras (words):")
    y -= 20
    for word in data_lists["words"]:
        c.drawString(x, y, word)
        y -= 15

    y -= 20
    c.drawString(x, y, "Lista de Classes:")
    y -= 20
    for class_item in data_lists["classes"]:
        c.drawString(x, y, class_item)
        y -= 15

    y -= 20
    c.drawString(x, y, "Lista de Dados de Treino X:")
    y -= 20
    for x_item in data_lists["x"]:
        c.drawString(x, y, str(x_item))
        y -= 15

    y -= 20
    c.drawString(x, y, "Lista de Alvos de Treino Y:")
    y -= 20
    for y_item in data_lists["y"]:
        c.drawString(x, y, str(y_item))
        y -= 15

    c.save()

data_lists = {
    "words": words,
    "classes": classes,
    "x": x,
    "y": y,
}

generate_pdf(data_lists)

# Load the trained model
model = load_model(model_file)

# Define helper functions for text processing and class prediction
def tokenize_words(text):
    return nltk.word_tokenize(text.lower())

def bag_of_words(tokenized_words, words):
    bag = [0] * len(words)
    for w in tokenized_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1
    return bag

def class_prediction(message, words, model):
    # Tokenize the input message
    tokenized_words = tokenize_words(message)
    # Convert tokenized words to input vector
    input_vector = [words.index(word) + 1 if word in words else 0 for word in tokenized_words]
    # Pad or truncate the input vector to match the expected input length
    input_vector = pad_or_truncate(input_vector, input_length=52)
    # Reshape the input vector to match the model's input shape
    input_vector = np.array(input_vector, dtype=np.float32).reshape(1, -1)
    # Predict the class probabilities for the input vector
    probabilities = model.predict(input_vector)[0]
    # Get the index of the class with the highest probability
    predicted_class_index = np.argmax(probabilities)
    # Get the corresponding class label
    predicted_class = classes[predicted_class_index]
    return predicted_class, probabilities

def pad_or_truncate(sequence, input_length):
    if len(sequence) < input_length:
        sequence = sequence + [0] * (input_length - len(sequence))
    else:
        sequence = sequence[:input_length]
    return sequence



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Marcos\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Marcos\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Epoch 1/200


  training = np.array(training)


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 1

Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [3]:
message = 'Oi'
predicted_class, probabilities = class_prediction(message, words, model)
print(predicted_class, probabilities)

saudacao [3.3196365e-31 5.7392696e-33 1.2580144e-29 5.5881337e-31 5.1674521e-30
 5.7830920e-33 7.6735862e-31 8.5434046e-17 1.0361485e-31 5.3197314e-34
 6.3323352e-22 1.3302641e-30 2.2730544e-31 5.2244339e-31 2.4328546e-29
 1.0942694e-32 3.3560409e-30 5.0729589e-30 2.7388672e-32 3.6959474e-35
 3.6697839e-30 1.0000000e+00]
