In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Dense, Lambda, Embedding, Bidirectional, LSTM, TextVectorization

In [2]:
vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"

In [13]:
data = pd.read_csv('/content/model_5W1H1G_dataset_more.csv')

sentences = data['feature']
labels = data['label']

training_sentences, testing_sentences, train_label, test_label = train_test_split(sentences, labels, test_size = 0.2, stratify = labels)

In [41]:
# tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok)
# tokenizer.fit_on_texts(training_sentences)

# train_sequences = tokenizer.texts_to_sequences(training_sentences)
# train_padded = pad_sequences(train_sequences, maxlen = max_length, padding = padding_type, truncating = trunc_type)

# validation_sequences = tokenizer.texts_to_sequences(testing_sentences)
# valid_padded = pad_sequences(validation_sequences, maxlen = max_length, padding = padding_type, truncating = trunc_type)



In [14]:
# label_tokenizer = Tokenizer()
# label_tokenizer.fit_on_texts(labels)

# train_label_final = np.array(label_tokenizer.texts_to_sequences(train_label))
# test_label_final = np.array(label_tokenizer.texts_to_sequences(test_label))

# train_label_final = to_categorical(train_label_final - 1)
# test_label_final = to_categorical(test_label_final - 1)

label_encoder = LabelEncoder()
train_label_final = label_encoder.fit_transform(train_label)
test_label_final = label_encoder.transform(test_label)
train_label_final = to_categorical(train_label_final, num_classes=8)
test_label_final = to_categorical(test_label_final, num_classes=8)


In [22]:
class MyCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs = None):
    if logs.get('val_loss') < 0.12 and logs.get('accuracy') >= 0.98:
      self.model.stop_training = True

In [23]:
vectorize_layer = TextVectorization(
    max_tokens=vocab_size,
    output_mode='int',
    output_sequence_length=max_length,
    pad_to_max_tokens=True
)

In [24]:
model = tf.keras.Sequential([
    vectorize_layer,
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(8, activation='softmax')
])

vectorize_layer.adapt(training_sentences.to_numpy())

In [25]:
model.compile(
    loss = tf.keras.losses.CategoricalCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ['accuracy']
)

In [26]:
model.fit(
    training_sentences,
    train_label_final,
    validation_data = (testing_sentences, test_label_final),
    epochs = 150,
    callbacks = [MyCallback()]
)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150


<keras.src.callbacks.History at 0x7bd27584abf0>

In [30]:
model.save('/content/drive/MyDrive/Colab Notebooks/MODEL 5W1H1G/revisi_model_5w1h1g')

In [29]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [31]:
import joblib
joblib.dump(label_encoder, '/content/drive/MyDrive/Colab Notebooks/MODEL 5W1H1G/label_encoder.joblib')

['/content/drive/MyDrive/Colab Notebooks/MODEL 5W1H1G/label_encoder.joblib']

In [6]:
import joblib

# Load label encoder
label_encoder = joblib.load('/content/label_encoder.joblib')
model = tf.keras.models.load_model('/content/revisi_model_5w1h1g')


In [9]:

# Melakukan prediksi
predictions = model.predict(['kenapa kamu melakukan hal itu?'])

# Mengambil indeks kelas dengan probabilitas tertinggi
predicted_class_index = tf.argmax(predictions, axis=1).numpy()[0]

# Menggunakan label encoder untuk mendapatkan nama kelas
predicted_class_name = label_encoder.classes_[predicted_class_index]

print(f'Predicted Class: {predicted_class_name}')

Predicted Class: kenapa
