In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, Flatten, LSTM
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

#load dataset
newsgroups_data = fetch_20newsgroups(subset='all')
texts = newsgroups_data.data
labels = newsgroups_data.target

#data preprocessing
tokenizer = Tokenizer(num_words = 20000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index

#hyperparameters
max_len = 1000
data = pad_sequences(sequences, maxlen=max_len)
labels = LabelBinarizer().fit_transform(labels)

#divide data to test and train
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=0)

#dnn
model_dnn = Sequential()
model_dnn.add(Embedding(20000, 128, input_length=max_len))
model_dnn.add(Flatten())
model_dnn.add(Dense(256, activation='relu'))
model_dnn.add(Dense(20, activation='softmax'))
model_dnn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#model fit
model_dnn.fit(x_train, y_train, epochs=5, batch_size=128, validation_data=(x_test, y_test))

#lstm
model_lstm = Sequential()
model_lstm.add(Embedding(20000, 128, input_length=max_len))
model_lstm.add(LSTM(128))
model_lstm.add(Dense(20, activation='softmax'))
model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model_lstm.fit(x_train, y_train, epochs=5, batch_size=128, validation_data =(x_test, y_test))

dnn_loss, dnn_accuracy = model_dnn.evaluate(x_test, y_test)
print(f'DNN Model Acc: {dnn_accuracy:.4f}')

lstm_loss, lstm_accuracy = model_lstm.evaluate(x_test, y_test)
print(f'LSTM Model Acc: {lstm_accuracy:.4f}')


2025-04-22 21:46:24.086635: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/5




[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 517ms/step - accuracy: 0.0558 - loss: 5.0012 - val_accuracy: 0.0769 - val_loss: 2.9382
Epoch 2/5
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 520ms/step - accuracy: 0.0931 - loss: 2.8928 - val_accuracy: 0.1119 - val_loss: 2.8744
Epoch 3/5
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 472ms/step - accuracy: 0.1035 - loss: 2.8697 - val_accuracy: 0.0902 - val_loss: 2.8867
Epoch 4/5
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 433ms/step - accuracy: 0.1238 - loss: 2.7939 - val_accuracy: 0.1528 - val_loss: 2.6235
Epoch 5/5
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 438ms/step - accuracy: 0.1759 - loss: 2.6457 - val_accuracy: 0.1111 - val_loss: 2.7873
Epoch 1/5
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 2s/step - accuracy: 0.1267 - loss: 2.8930 - val_accuracy: 0.2870 - val_loss: 2.2026
Epoch 2/5
[1m118/118[0m [