In [3]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, GlobalMaxPool1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
data = pd.read_excel('data.xlsx') # Substitua pelo nome do arquivo
data = data.dropna()

In [26]:
# 1. Carregando os dados

X = data["X_Text_input"]
y = data["Y_SISTEMA_output"]  # Trabalhando apenas com Y_SISTEMA_output


# 2. Pré-processando a saída
# Convertendo as categorias da saída para valores numéricos
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 3. Dividindo os dados
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# 4. Tokenização e padronização das sequências
max_words = 10000  # Número máximo de palavras no vocabulário
max_len = 100      # Tamanho máximo de sequência

tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

# 5. Criando o modelo
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128),
    LSTM(128, return_sequences=True),
    GlobalMaxPool1D(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')  # Classificação multiclasse
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 6. Treinando o modelo
history = model.fit(
    X_train_padded, y_train,
    validation_data=(X_test_padded, y_test),
    epochs=50,
    batch_size=32,
    verbose=1
)


Epoch 1/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 57ms/step - accuracy: 0.5708 - loss: 1.3392 - val_accuracy: 0.9270 - val_loss: 0.2865
Epoch 2/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 59ms/step - accuracy: 0.9418 - loss: 0.2377 - val_accuracy: 0.9499 - val_loss: 0.1880
Epoch 3/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 58ms/step - accuracy: 0.9653 - loss: 0.1440 - val_accuracy: 0.9561 - val_loss: 0.1631
Epoch 4/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 57ms/step - accuracy: 0.9770 - loss: 0.0914 - val_accuracy: 0.9561 - val_loss: 0.1653
Epoch 5/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 58ms/step - accuracy: 0.9805 - loss: 0.0768 - val_accuracy: 0.9585 - val_loss: 0.1692
Epoch 6/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 61ms/step - accuracy: 0.9834 - loss: 0.0699 - val_accuracy: 0.9599 - val_loss: 0.1671
Epoch 7/50
[1m2

In [27]:
# 7. Avaliando o modelo
loss, accuracy = model.evaluate(X_test_padded, y_test)
print(f"Loss: {loss}, Accuracy: {accuracy}")

# 8. Fazendo previsões
y_pred = model.predict(X_test_padded)
y_pred_classes = np.argmax(y_pred, axis=1)

# Decodificando os rótulos previstos
pred_labels = label_encoder.inverse_transform(y_pred_classes)
print(pred_labels[:5])

[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - accuracy: 0.9555 - loss: 0.2811
Loss: 0.2920001447200775, Accuracy: 0.9575179219245911
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step
['FRENAGEM' 'CCT' 'RODEIRO' 'CCT' 'ENTRESSAFRA']


In [28]:
#salvando movel
from tensorflow.keras.models import save_model
import pickle

# Salvar o modelo treinado
model.save("modelo_classificacao_y_Sistema.h5")

# Salvar o tokenizer
with open("tokenizer_Sistema.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

# Salvar o LabelEncoder
with open("label_encoder_Sistema.pkl", "wb") as f:
    pickle.dump(label_encoder, f)




In [29]:
from tensorflow.keras.models import load_model
import pickle

# Carregar o modelo
modelo_carregado = load_model("modelo_classificacao_y_Sistema.h5")

# Carregar o tokenizer
with open("tokenizer_Sistema.pkl", "rb") as f:
    tokenizer_carregado = pickle.load(f)

# Carregar o LabelEncoder
with open("label_encoder_Sistema.pkl", "rb") as f:
    label_encoder_carregado = pickle.load(f)

# Fazer previsão com o modelo carregado
novo_texto = ["02/01/2024-|CA||4E||RODA BANDAGEM ESCOAMENTO"]
novo_texto_seq = tokenizer_carregado.texts_to_sequences(novo_texto)
novo_texto_padded = pad_sequences(novo_texto_seq, maxlen=max_len, padding='post', truncating='post')

pred_proba = modelo_carregado.predict(novo_texto_padded)
pred_classe = np.argmax(pred_proba, axis=1)
pred_label = label_encoder_carregado.inverse_transform(pred_classe)

print(f"Entrada: {novo_texto[0]}")
print(f"Previsão: {pred_label[0]}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303ms/step
Entrada: 02/01/2024-|CA||4E||RODA BANDAGEM ESCOAMENTO
Previsão: RODEIRO


In [30]:
#COnjunto !!!!!!!!!
# 1. Carregando os dados
data = data.dropna()
X = data["X_Text_input"]
y = data["Y_CONJUNTO_output"]  # Trabalhando apenas com Y_SISTEMA_output
# 2. Pré-processando a saída
# Convertendo as categorias da saída para valores numéricos
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 3. Dividindo os dados
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# 4. Tokenização e padronização das sequências
max_words = 10000  # Número máximo de palavras no vocabulário
max_len = 100      # Tamanho máximo de sequência

tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

# 5. Criando o modelo
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128),
    LSTM(128, return_sequences=True),
    GlobalMaxPool1D(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')  # Classificação multiclasse
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 6. Treinando o modelo
history = model.fit(
    X_train_padded, y_train,
    validation_data=(X_test_padded, y_test),
    epochs=100,
    batch_size=32,
    verbose=1
)

Epoch 1/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 80ms/step - accuracy: 0.5368 - loss: 2.0098 - val_accuracy: 0.8368 - val_loss: 0.6792
Epoch 2/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 89ms/step - accuracy: 0.8571 - loss: 0.6315 - val_accuracy: 0.8874 - val_loss: 0.4677
Epoch 3/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 84ms/step - accuracy: 0.8947 - loss: 0.4498 - val_accuracy: 0.9088 - val_loss: 0.4008
Epoch 4/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 87ms/step - accuracy: 0.9093 - loss: 0.3729 - val_accuracy: 0.9103 - val_loss: 0.3651
Epoch 5/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 82ms/step - accuracy: 0.9188 - loss: 0.3186 - val_accuracy: 0.9212 - val_loss: 0.3306
Epoch 6/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 78ms/step - accuracy: 0.9387 - loss: 0.2443 - val_accuracy: 0.9227 - val_loss: 0.3155
Epoch 7/10

In [31]:
#salvando movel
#COnjunto !!!!!!!!!


from tensorflow.keras.models import save_model
import pickle

# Salvar o modelo treinado
model.save("modelo_classificacao_y_CONJUNTO.h5")

# Salvar o tokenizer
with open("tokenizer_CONJUNTO.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

# Salvar o LabelEncoder
with open("label_encoder_CONJUNTO.pkl", "wb") as f:
    pickle.dump(label_encoder, f)



In [32]:
#testando model
#COnjunto !!!!!!!!!
from tensorflow.keras.models import load_model
import pickle
# 4. Tokenização e padronização das sequências
max_words = 10000  # Número máximo de palavras no vocabulário
max_len = 100      # Tamanho máximo de sequência

# Carregar o modelo
modelo_carregado = load_model("modelo_classificacao_y_CONJUNTO.h5")

# Carregar o tokenizer
with open("tokenizer_CONJUNTO.pkl", "rb") as f:
    tokenizer_carregado = pickle.load(f)

# Carregar o LabelEncoder
with open("label_encoder_CONJUNTO.pkl", "rb") as f:
    label_encoder_carregado = pickle.load(f)

# Fazer previsão com o modelo carregado
novo_texto = ["17.02 - rod 1 esq escoamento de material + roda 1 ld friso fino 18mm e quebradiço - não carregar no porto - ATA"]
novo_texto_seq = tokenizer_carregado.texts_to_sequences(novo_texto)
novo_texto_padded = pad_sequences(novo_texto_seq, maxlen=max_len, padding='post', truncating='post')

pred_proba = modelo_carregado.predict(novo_texto_padded)
pred_classe = np.argmax(pred_proba, axis=1)
pred_label = label_encoder_carregado.inverse_transform(pred_classe)

print(f"Entrada: {novo_texto[0]}")
print(f"Previsão: {pred_label[0]}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 287ms/step
Entrada: 17.02 - rod 1 esq escoamento de material + roda 1 ld friso fino 18mm e quebradiço - não carregar no porto - ATA
Previsão: RODEIRO


In [33]:
#Item <--------------------------------------------------------------------------------

# 1. Carregando os dados
data = data.dropna()
X = data["X_Text_input"]
y = data["Y_ITEM_output"]  # Trabalhando apenas com Y_SISTEMA_output
# 2. Pré-processando a saída
# Convertendo as categorias da saída para valores numéricos
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 3. Dividindo os dados
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# 4. Tokenização e padronização das sequências
max_words = 10000  # Número máximo de palavras no vocabulário
max_len = 100      # Tamanho máximo de sequência

tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

# 5. Criando o modelo
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128),
    LSTM(128, return_sequences=True),
    GlobalMaxPool1D(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')  # Classificação multiclasse
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 6. Treinando o modelo
history = model.fit(
    X_train_padded, y_train,
    validation_data=(X_test_padded, y_test),
    epochs=5,
    batch_size=32,
    verbose=1
)

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 60ms/step - accuracy: 0.2259 - loss: 3.5001 - val_accuracy: 0.7026 - val_loss: 1.3487
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 59ms/step - accuracy: 0.7287 - loss: 1.2900 - val_accuracy: 0.7895 - val_loss: 0.9215
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 57ms/step - accuracy: 0.8077 - loss: 0.8536 - val_accuracy: 0.8119 - val_loss: 0.7541
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 60ms/step - accuracy: 0.8412 - loss: 0.6562 - val_accuracy: 0.8248 - val_loss: 0.6925
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 56ms/step - accuracy: 0.8552 - loss: 0.5786 - val_accuracy: 0.8368 - val_loss: 0.6754


In [34]:
#salvando movel
#Item <--------------------------------------------------------------------------------
from tensorflow.keras.models import save_model
import pickle

# Salvar o modelo treinado
model.save("modelo_classificacao_y_Item.h5")

# Salvar o tokenizer
with open("tokenizer_Item.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

# Salvar o LabelEncoder
with open("label_encoder_Item.pkl", "wb") as f:
    pickle.dump(label_encoder, f)



In [35]:
#testando model
#Item <--------------------------------------------------------------------------------
from tensorflow.keras.models import load_model
import pickle

# Carregar o modelo
modelo_carregado = load_model("modelo_classificacao_y_Item.h5")

# Carregar o tokenizer
with open("tokenizer_Item.pkl", "rb") as f:
    tokenizer_carregado = pickle.load(f)

# Carregar o LabelEncoder
with open("label_encoder_Item.pkl", "rb") as f:
    label_encoder_carregado = pickle.load(f)

# Fazer previsão com o modelo carregado
novo_texto = ["roda 3e detecÇÃo wcm médio 248kn"]
novo_texto_seq = tokenizer_carregado.texts_to_sequences(novo_texto)
novo_texto_padded = pad_sequences(novo_texto_seq, maxlen=max_len, padding='post', truncating='post')

pred_proba = modelo_carregado.predict(novo_texto_padded)
pred_classe = np.argmax(pred_proba, axis=1)
pred_label = label_encoder_carregado.inverse_transform(pred_classe)

print(f"Entrada: {novo_texto[0]}")
print(f"Previsão: {pred_label[0]}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
Entrada: roda 3e detecÇÃo wcm médio 248kn
Previsão: WCM


In [36]:
#Problema <--------------------------------------------------------------------------------

# 1. Carregando os dados
data = data.dropna()
X = data["X_Text_input"]
y = data["Y_PROBLEMA_output"]  # Trabalhando apenas com Y_SISTEMA_output
# 2. Pré-processando a saída
# Convertendo as categorias da saída para valores numéricos
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 3. Dividindo os dados
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# 4. Tokenização e padronização das sequências
max_words = 10000  # Número máximo de palavras no vocabulário
max_len = 100      # Tamanho máximo de sequência

tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

# 5. Criando o modelo
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128),
    LSTM(128, return_sequences=True),
    GlobalMaxPool1D(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')  # Classificação multiclasse
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 6. Treinando o modelo
history = model.fit(
    X_train_padded, y_train,
    validation_data=(X_test_padded, y_test),
    epochs=50,
    batch_size=32,
    verbose=1
)

Epoch 1/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 57ms/step - accuracy: 0.1819 - loss: 3.6387 - val_accuracy: 0.5995 - val_loss: 1.8619
Epoch 2/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 56ms/step - accuracy: 0.6406 - loss: 1.6341 - val_accuracy: 0.6764 - val_loss: 1.3685
Epoch 3/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 55ms/step - accuracy: 0.7183 - loss: 1.1609 - val_accuracy: 0.7270 - val_loss: 1.1986
Epoch 4/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 54ms/step - accuracy: 0.7716 - loss: 0.9194 - val_accuracy: 0.7480 - val_loss: 1.0882
Epoch 5/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 59ms/step - accuracy: 0.8056 - loss: 0.7588 - val_accuracy: 0.7675 - val_loss: 1.0430
Epoch 6/50
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 56ms/step - accuracy: 0.8227 - loss: 0.6764 - val_accuracy: 0.7728 - val_loss: 1.0195
Epoch 7/50
[1m2

In [37]:
#salvando movel
#Problema <--------------------------------------------------------------------------------
from tensorflow.keras.models import save_model
import pickle

# Salvar o modelo treinado
model.save("modelo_classificacao_y_Problema.h5")

# Salvar o tokenizer
with open("tokenizer_PROBLEMA.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

# Salvar o LabelEncoder
with open("label_encoder_PROBLEMA.pkl", "wb") as f:
    pickle.dump(label_encoder, f)



In [38]:
#testando model
#Problema <--------------------------------------------------------------------------------
from tensorflow.keras.models import load_model
import pickle

# Carregar o modelo
modelo_carregado = load_model("modelo_classificacao_y_Problema.h5")

# Carregar o tokenizer
with open("tokenizer_PROBLEMA.pkl", "rb") as f:
    tokenizer_carregado = pickle.load(f)

# Carregar o LabelEncoder
with open("label_encoder_PROBLEMA.pkl", "rb") as f:
    label_encoder_carregado = pickle.load(f)

# Fazer previsão com o modelo carregado
novo_texto = ["11/01 |CA-LD|240 mm|||LONG BAT DIANTEIRO TRINC"]
novo_texto_seq = tokenizer_carregado.texts_to_sequences(novo_texto)
novo_texto_padded = pad_sequences(novo_texto_seq, maxlen=max_len, padding='post', truncating='post')

pred_proba = modelo_carregado.predict(novo_texto_padded)
pred_classe = np.argmax(pred_proba, axis=1)
pred_label = label_encoder_carregado.inverse_transform(pred_classe)

print(f"Entrada: {novo_texto[0]}")
print(f"Previsão: {pred_label[0]}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
Entrada: 11/01 |CA-LD|240 mm|||LONG BAT DIANTEIRO TRINC
Previsão: TRINCADA


In [4]:
# modelo final 

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, GlobalMaxPool1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

# 1. Carregando os dados
data = data.dropna()
X = data[["Y_SISTEMA_output", "Y_CONJUNTO_output", "Y_ITEM_output", "Y_PROBLEMA_output"]]
y = data["Y_OCORRÊNCIA_output"]

# 2. Codificando os rótulos da saída
label_encoder_y = LabelEncoder()
y_encoded = label_encoder_y.fit_transform(y)

# 3. Dividindo os dados
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# 4. Tokenização e padronização das entradas
max_words = 10000  # Número máximo de palavras no vocabulário
max_len = 100       # Tamanho máximo de sequência

tokenizers = {}
X_train_padded = {}
X_test_padded = {}

# Criar um tokenizer para cada entrada categórica
for column in X.columns:
    tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")
    tokenizer.fit_on_texts(X_train[column])
    tokenizers[column] = tokenizer
    # Tokenizar e padronizar as entradas
    X_train_padded[column] = pad_sequences(tokenizer.texts_to_sequences(X_train[column]), maxlen=max_len, padding='post')
    X_test_padded[column] = pad_sequences(tokenizer.texts_to_sequences(X_test[column]), maxlen=max_len, padding='post')

# 5. Criando o modelo
input_layers = []
embedding_layers = []

for column in X.columns:
    input_layer = Input(shape=(max_len,), name=f"input_{column}")
    embedding_layer = Embedding(input_dim=max_words, output_dim=128)(input_layer)
    lstm_layer = LSTM(64, return_sequences=True)(embedding_layer)
    pooled_layer = GlobalMaxPool1D()(lstm_layer)
    input_layers.append(input_layer)
    embedding_layers.append(pooled_layer)

# Concatenar todas as saídas de embedding
concatenated = Concatenate()(embedding_layers)
dense1 = Dense(128, activation='relu')(concatenated)
dense2 = Dense(64, activation='relu')(dense1)
output_layer = Dense(len(label_encoder_y.classes_), activation='softmax', name="output_y_ocorrencia")(dense2)

# Criar o modelo final
model = Model(inputs=input_layers, outputs=output_layer)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 6. Treinando o modelo
history = model.fit(
    [X_train_padded[col] for col in X.columns], y_train,
    validation_data=([X_test_padded[col] for col in X.columns], y_test),
    epochs=100,
    batch_size=32,
    verbose=1
)




Epoch 1/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 119ms/step - accuracy: 0.2380 - loss: 3.9389 - val_accuracy: 0.7442 - val_loss: 1.3728
Epoch 2/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 122ms/step - accuracy: 0.7604 - loss: 1.1654 - val_accuracy: 0.7876 - val_loss: 1.0328
Epoch 3/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 119ms/step - accuracy: 0.7992 - loss: 0.8261 - val_accuracy: 0.8119 - val_loss: 0.9585
Epoch 4/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 116ms/step - accuracy: 0.8242 - loss: 0.6641 - val_accuracy: 0.8162 - val_loss: 0.9109
Epoch 5/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 123ms/step - accuracy: 0.8349 - loss: 0.5481 - val_accuracy: 0.8334 - val_loss: 0.9066
Epoch 6/100
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 130ms/step - accuracy: 0.8525 - loss: 0.4811 - val_accuracy: 0.8315 - val_loss: 0.9183
Epoc

In [5]:
# modelo final 
# 7. Avaliando o modelo
loss, accuracy = model.evaluate([X_test_padded[col] for col in X.columns], y_test)
print(f"Loss: {loss}, Accuracy: {accuracy}")

# 8. Fazendo previsões
novo_exemplo = {
    "Y_SISTEMA_output": ["TRUQUE"],
    "Y_CONJUNTO_output": ["TRAVESSA CENTRAL"],
    "Y_ITEM_output": ["MOLA CUNHA"],
    "Y_PROBLEMA_output": ["QUEBRADO"]
}

# Processar as novas entradas
novo_exemplo_padded = [
    pad_sequences(tokenizers[col].texts_to_sequences(novo_exemplo[col]), maxlen=max_len, padding='post')
    for col in novo_exemplo.keys()
]

# Previsão
pred_proba = model.predict(novo_exemplo_padded)
pred_class = np.argmax(pred_proba, axis=1)
pred_label = label_encoder_y.inverse_transform(pred_class)

print(f"Entrada: {novo_exemplo}")
print(f"Previsão: {pred_label[0]}")

# 9. Salvando o modelo e os tokenizers
model.save("modelo_multientrada_y_OCORRÊNCIA.h5")

with open("tokenizers_OCORRÊNCIA.pkl", "wb") as f:
    pickle.dump(tokenizers, f)

with open("label_encoder_y_OCORRÊNCIA.pkl", "wb") as f:
    pickle.dump(label_encoder_y, f)


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.8416 - loss: 1.2560
Loss: 1.0983006954193115, Accuracy: 0.8482100367546082
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 628ms/step




Entrada: {'Y_SISTEMA_output': ['TRUQUE'], 'Y_CONJUNTO_output': ['TRAVESSA CENTRAL'], 'Y_ITEM_output': ['MOLA CUNHA'], 'Y_PROBLEMA_output': ['QUEBRADO']}
Previsão: CUNHA AVARIADA


In [6]:
# modelo final 
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

# 1. Carregar o modelo salvo
model = load_model("modelo_multientrada_y_OCORRÊNCIA.h5")

# 2. Carregar os preprocessadores salvos
with open("tokenizers_OCORRÊNCIA.pkl", "rb") as f:
    tokenizers = pickle.load(f)

with open("label_encoder_y_OCORRÊNCIA.pkl", "rb") as f:
    label_encoder_y = pickle.load(f)

# 3. Definir uma nova entrada para teste
novo_exemplo = {
    "Y_SISTEMA_output": ["CCT"],
    "Y_CONJUNTO_output": ["CCT"],
    "Y_ITEM_output": ["ROTOR"],
    "Y_PROBLEMA_output": ["QUEBRADO"]
}

# 4. Preprocessar as novas entradas
max_len = 100  # Deve ser o mesmo usado ao treinar o modelo
novo_exemplo_padded = [
    pad_sequences(tokenizers[col].texts_to_sequences(novo_exemplo[col]), maxlen=max_len, padding='post')
    for col in novo_exemplo.keys()
]

# 5. Fazer previsões
pred_proba = model.predict(novo_exemplo_padded)
pred_class = np.argmax(pred_proba, axis=1)
pred_label = label_encoder_y.inverse_transform(pred_class)

# 6. Exibir o resultado
print(f"Entrada: {novo_exemplo}")
print(f"Previsão (classe): {pred_label[0]}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 582ms/step
Entrada: {'Y_SISTEMA_output': ['CCT'], 'Y_CONJUNTO_output': ['CCT'], 'Y_ITEM_output': ['ROTOR'], 'Y_PROBLEMA_output': ['QUEBRADO']}
Previsão (classe): ROTOR QUEBRADO
