In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation
from keras.layers import Bidirectional, GlobalMaxPool1D
from keras.models import Model

In [2]:
# Load the data
df = pd.read_csv("dataframe_labeled.csv")

In [3]:
# Preprocess the data
texts = df['text'].values
labels = df['failureCat'].values

In [4]:
# Convert labels to numerical values
le = LabelEncoder()
y = le.fit_transform(labels)

In [5]:
# Convert the list to a set to remove duplicates
unique_values = set(y)

# Convert the set back to a list
unique_values = list(unique_values)

totalClasses = len(unique_values)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]


In [6]:
# Tokenize the text data
max_words = 10000
max_len = 200
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
x = pad_sequences(sequences, maxlen=max_len)

In [7]:
# Split the data into train, validation, and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)

In [8]:
# Convert labels to one-hot encoded format
y_train = to_categorical(y_train, num_classes=18)
y_val = to_categorical(y_val, num_classes=18)
y_test = to_categorical(y_test, num_classes=18)

In [9]:
# Build the model
input_ = Input(shape=(max_len,))
x = Embedding(max_words, 128)(input_)
x = LSTM(60, return_sequences=True, name='lstm_layer')(x)
x = GlobalMaxPool1D()(x)
x = Dropout(0.1)(x)
x = Dense(50, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(18, activation="softmax")(x)
model = Model(inputs=input_, outputs=x)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [10]:
# Train the model
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x27f9e9d4588>

In [11]:
# Evaluate the model on the test set
score, acc = model.evaluate(x_test, y_test, batch_size=64)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 2.2359800338745117
Test accuracy: 0.5
