In [None]:
# Import dataset yang akan digunakan yaitu 'bbc-text.csv' (source: https://www.kaggle.com/balatmak/newsgroup20bbcnews?select=bbc-text.csv)

import pandas as pd
news_df = pd.read_csv('bbc-text.csv')
news_df

Unnamed: 0,category,text
0,tech,tv future in the hands of viewers with home th...
1,business,worldcom boss left books alone former worldc...
2,sport,tigers wary of farrell gamble leicester say ...
3,sport,yeading face newcastle in fa cup premiership s...
4,entertainment,ocean s twelve raids box office ocean s twelve...
...,...,...
2220,business,cars pull down us retail figures us retail sal...
2221,politics,kilroy unveils immigration policy ex-chatshow ...
2222,entertainment,rem announce new glasgow concert us band rem h...
2223,politics,how political squabbles snowball it s become c...


In [None]:
# Menggunakan One Hot Encoding untuk memisahkan data kolom 'category'

category = pd.get_dummies(news_df['category'])
news_fixed = pd.concat([news_df, category], axis=1)
news_fixed = news_fixed.drop(columns='category')
news_fixed

Unnamed: 0,text,business,entertainment,politics,sport,tech
0,tv future in the hands of viewers with home th...,0,0,0,0,1
1,worldcom boss left books alone former worldc...,1,0,0,0,0
2,tigers wary of farrell gamble leicester say ...,0,0,0,1,0
3,yeading face newcastle in fa cup premiership s...,0,0,0,1,0
4,ocean s twelve raids box office ocean s twelve...,0,1,0,0,0
...,...,...,...,...,...,...
2220,cars pull down us retail figures us retail sal...,1,0,0,0,0
2221,kilroy unveils immigration policy ex-chatshow ...,0,0,1,0,0
2222,rem announce new glasgow concert us band rem h...,0,1,0,0,0
2223,how political squabbles snowball it s become c...,0,0,1,0,0


In [None]:
# Menggunakan values function agar bisa diproses oleh model

text = news_fixed['text'].values
label = news_fixed[['business', 'entertainment', 'politics', 'sport', 'tech']].values

In [None]:
# Train test split data

from sklearn.model_selection import train_test_split

text_train, text_test, label_train, label_test = train_test_split(text, label, test_size=0.2)

In [None]:
# Menggunakan text preprocessing tokenizer dan sequence preprocessing padsequences

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
 
tokenizer = Tokenizer(num_words=5000, oov_token='0')
tokenizer.fit_on_texts(text_train)
tokenizer.fit_on_texts(text_test)

train_sequences = tokenizer.texts_to_sequences(text_train)
test_sequences = tokenizer.texts_to_sequences(text_test)
 
train_padsequences = pad_sequences(train_sequences)
test_padsequences = pad_sequences(test_sequences)

In [None]:
# Menggunakan model sequential dengan menggunakan layers embedding, LSTM, dan Dense

import tensorflow
from tensorflow import keras

model = keras.Sequential([
                          keras.layers.Embedding(input_dim=5000, output_dim=16),
                          keras.layers.LSTM(64),
                          keras.layers.Dense(128, activation='relu'),
                          keras.layers.Dropout(0.2),
                          keras.layers.Dense(64, activation='relu'),
                          keras.layers.Dropout(0.2),
                          keras.layers.Dense(5, activation='softmax')
])

In [None]:
# Compile model dengan loss dan optimizer

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
# Tambahkan fungsi calbacks on_epoch_end

class myCallback(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>0.9 and logs.get('val_accuracy')>0.9):
      print("\nAkurasi Model telah mencapai >90%!")
      print("\nAkurasi Validation telah mencapai >90%!")
      self.model.stop_training = True
callbacks = myCallback()

In [None]:
# Fit model

hist = model.fit(
    train_padsequences,
    label_train,
    epochs=40,
    batch_size=32,
    validation_data=(test_padsequences, label_test),
    validation_steps=5,
    verbose=2,
    callbacks=[callbacks]
)

Epoch 1/40
56/56 - 107s - loss: 0.0023 - accuracy: 1.0000 - val_loss: 0.5788 - val_accuracy: 0.8899
Epoch 2/40
56/56 - 106s - loss: 8.2078e-04 - accuracy: 1.0000 - val_loss: 0.6735 - val_accuracy: 0.8719
Epoch 3/40
56/56 - 106s - loss: 8.7570e-04 - accuracy: 1.0000 - val_loss: 0.6085 - val_accuracy: 0.8787
Epoch 4/40
56/56 - 105s - loss: 4.4625e-04 - accuracy: 1.0000 - val_loss: 0.6630 - val_accuracy: 0.8921
Epoch 5/40
56/56 - 106s - loss: 3.6056e-04 - accuracy: 1.0000 - val_loss: 0.6392 - val_accuracy: 0.8921
Epoch 6/40
56/56 - 107s - loss: 2.0114e-04 - accuracy: 1.0000 - val_loss: 0.6509 - val_accuracy: 0.8989
Epoch 7/40
56/56 - 106s - loss: 3.4309e-04 - accuracy: 1.0000 - val_loss: 0.6313 - val_accuracy: 0.8944
Epoch 8/40
56/56 - 106s - loss: 0.0014 - accuracy: 0.9994 - val_loss: 0.6528 - val_accuracy: 0.8787
Epoch 9/40


In [None]:
# Membuat grafik plot loss dan akurasi train test

import matplotlib.pyplot as plt

plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Grafik Plot Akurasi Model')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='lower right')
plt.show()

In [None]:
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Grafik Plot Loss Model')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='lower right')
plt.show()