**Importing the required libraries**

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

**Load the dataset**


In [None]:
url = "https://raw.githubusercontent.com/meytiii/Deep-Learning/main/datasets/6-persian-topics.zip"
dataset = tf.keras.utils.get_file("6-persian-topics.zip", url, extract=True)

data_dir = os.path.join(os.path.dirname(dataset), "6-persian-topics")
#This format of data pathing is mostly used in Google Colab, Feel free to change if you are running this notebook on your own PC.

**Process the dataset**
Seperate labels and texts in their own lists

In [None]:
class_names = os.listdir(data_dir)
class_dirs = [os.path.join(data_dir, class_name) for class_name in class_names]
texts = []
labels = []

for i, class_dir in enumerate(class_dirs):
    file_names = os.listdir(class_dir)
    for file_name in file_names:
        file_path = os.path.join(class_dir, file_name)
        with open(file_path, "r", encoding="utf-8") as file:
            text = file.read()
            texts.append(text)
            labels.append(i)

**Tokenize the text**

In [None]:
max_features = 10000
max_len = 200
tokenizer = Tokenizer(num_words=max_features, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
X = pad_sequences(sequences, maxlen=max_len)

In [None]:
y = np.array(labels)

**Split the dataset into training and validation sets**

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

**Define the model**

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(max_features, 16, input_length=max_len),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(len(class_names), activation="softmax")
])

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

**Train the model**

In [None]:
epochs = 15
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    verbose=2
)

Epoch 1/15
940/940 - 5s - loss: 0.9692 - accuracy: 0.6729 - val_loss: 0.4705 - val_accuracy: 0.8770 - 5s/epoch - 5ms/step
Epoch 2/15
940/940 - 6s - loss: 0.3308 - accuracy: 0.8983 - val_loss: 0.2603 - val_accuracy: 0.9084 - 6s/epoch - 6ms/step
Epoch 3/15
940/940 - 4s - loss: 0.2053 - accuracy: 0.9352 - val_loss: 0.1881 - val_accuracy: 0.9466 - 4s/epoch - 4ms/step
Epoch 4/15
940/940 - 5s - loss: 0.1495 - accuracy: 0.9550 - val_loss: 0.1598 - val_accuracy: 0.9501 - 5s/epoch - 5ms/step
Epoch 5/15
940/940 - 6s - loss: 0.1186 - accuracy: 0.9643 - val_loss: 0.1372 - val_accuracy: 0.9581 - 6s/epoch - 7ms/step
Epoch 6/15
940/940 - 5s - loss: 0.0963 - accuracy: 0.9713 - val_loss: 0.1254 - val_accuracy: 0.9623 - 5s/epoch - 5ms/step
Epoch 7/15
940/940 - 6s - loss: 0.0796 - accuracy: 0.9760 - val_loss: 0.1162 - val_accuracy: 0.9662 - 6s/epoch - 7ms/step
Epoch 8/15
940/940 - 5s - loss: 0.0666 - accuracy: 0.9804 - val_loss: 0.1119 - val_accuracy: 0.9671 - 5s/epoch - 5ms/step
Epoch 9/15
940/940 - 5s 

**Evaluate the model and get important informations like F1-Score**

In [None]:
y_pred = model.predict(X_val)
y_pred = [np.argmax(pred) for pred in y_pred]
print(classification_report(y_val, y_pred, target_names=class_names))

                                  precision    recall  f1-score   support

فناوری و علوم کاربردی و تکنولوژی       0.95      0.96      0.96      1322
                جغرافیا و مکانها       0.99      0.99      0.99      4334
                  بهداشت و سلامت       0.92      0.97      0.94       381
                    دین و اعتقاد       0.88      0.77      0.82       115
                            ورزش       1.00      0.99      0.99      1295
                         ریاضیات       0.63      0.50      0.56        68

                        accuracy                           0.98      7515
                       macro avg       0.89      0.86      0.88      7515
                    weighted avg       0.97      0.98      0.98      7515



**As you can see, Each F1-Score for every subject is above 0.5 which is a good thing.**