### Install Pakage

In [None]:
!pip install hazm
!pip install tensorflow-gpu==2.0



### import Pakage 

In [None]:
import numpy as np
import pandas as pd

from tensorflow.keras import optimizers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Input, Embedding, Dropout, GlobalMaxPool1D
from tensorflow.keras.layers import LSTM, Bidirectional
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Preprocessing
from hazm import *


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!unzip /content/drive/MyDrive/digimag -d /content/drive/MyDrive

Archive:  /content/drive/MyDrive/digimag.zip
replace /content/drive/MyDrive/digimag/dev.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: no
replace /content/drive/MyDrive/digimag/test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: no
replace /content/drive/MyDrive/digimag/train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: no


In [None]:
!unzip /content/drive/MyDrive/digimag.zip -d /content/dataset/

Archive:  /content/drive/MyDrive/digimag.zip
replace /content/dataset/digimag/dev.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: no
replace /content/dataset/digimag/test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: no
replace /content/dataset/digimag/train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: no


### Read dataset

In [None]:
train_data = pd.read_csv('/content/drive/MyDrive/digimag/train.csv', encoding='utf-8', sep='\t')
train_data

Unnamed: 0.1,Unnamed: 0,content,label,label_id
0,0,نمایش تبلیغ در لاک‌اسکرین تعدادی از گوشی‌های ه...,علم و تکنولوژی,3
1,1,شکست Justice League در باکس آفیس پس از بازخورد...,هنر و سینما,5
2,2,کلاسیک بینی؛ همه چیز در یک شب اتفاق افتاد فیلم...,هنر و سینما,5
3,3,اپل دوباره سراغ رنده رفته چراکه آپگرید کردن سط...,علم و تکنولوژی,3
4,4,بررسی جزء به جزء بهترین بخش Ori and the Blind ...,بازی ویدیویی,0
...,...,...,...,...
6891,6891,چند ساعت خواب برای بدن شما مفید است؟ شایع است ...,سلامت و زیبایی,2
6892,6892,فیلم مگامن ساخته می‌شود شرکت بازی‌سازی کپ‌کام ...,بازی ویدیویی,0
6893,6893,تعداد حملات بدافزاری به سیستم‌ عامل مکینتاش بر...,علم و تکنولوژی,3
6894,6894,چطور از مبتلا شدن به آنفولانزا پیشگیری کنیم؟ (...,سلامت و زیبایی,2


In [None]:
sentence_train = train_data['content']
label_train = train_data['label_id']

print('Number of training sentence: ', sentence_train.shape)
print('Number of training label: ', label_train.shape)


Number of training sentence:  (6896,)
Number of training label:  (6896,)


In [None]:
from collections import Counter
cnt = Counter(label_train)
cnt = dict(cnt)
print(cnt)

{3: 2245, 5: 1350, 0: 1593, 6: 206, 2: 1304, 4: 97, 1: 101}


In [None]:
# Convert dataframes to numpy arrays
sentence_train = np.asarray(sentence_train)
label_train = np.asarray(label_train)

In [None]:
# Prepare labels for categorical prediction
categorical_label_train = to_categorical(label_train, 7)
categorical_label_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

### Preprocessing 

In [None]:
import re
normalizer = Normalizer()
lemmatizer = Lemmatizer()
stemmer = Stemmer()
# turn a doc into clean tokens
def clean_data(doc):
    doc = normalizer.normalize(doc) # Normalize document using Hazm Normalizer
    tokenized = word_tokenize(doc)  # Tokenize text
    # ['یه', 'ساله', 'خریدم', 'خیلی', 'ضعیف', 'است', 'بازی', 'هم', 'که', 'چی', 'بگم', 'هیچ', 'کدوم', 'رو', 'باز', 'نمیکنه']
    tokens = []
    for token in tokenized:
      token = re.sub("[،:.,;()/+]", " ", token) 
      token = re.sub(r"\!+","!", token)
      token = re.sub(r"\؟+","؟", token)
      token = re.sub(r"\u200c", " ", token)
      tokens.append(token)

    tokens = [w for w in tokens if not len(w) <= 1] # single character removal 
    tokens = [w for w in tokens if not w.isdigit()] # digit remove
    tokens = [lemmatizer.lemmatize(w) for w in tokens] # Lemmatize sentence words using Hazm Lemmatizer
    tokens = [stemmer.stem(w) for w in tokens] 
    tokens = ' '.join(tokens)
    return tokens

### Apply preprocessing to dataset




In [None]:
# Apply preprocessing step to training data
train_docs = np.empty_like(sentence_train)
for index, document in enumerate(sentence_train):
  train_docs[index] = clean_data(document)

### Set tokenizer and encode sentences


```
با کمي هزينه بيشتر يک گوشي سوني در همين رده بگيريد بهتر خواهد بود.
[7, 64, 664, 104, 16, 11, 240, 5, 191, 282, 68, 54, 131, 36, 37]
```



In [None]:
num_words = 2000

# Create the tokenizer
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(train_docs)
encoded_docs = tokenizer.texts_to_sequences(train_docs)
print(encoded_docs)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
# Find maximum length of training sentences
max_length = max([len(s.split()) for s in train_docs])
max_length

10148

### Padding 

In [None]:
# Pad embeded training sequences
x_train_padded = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

In [None]:
x_train_padded[1]

array([664,   2, 216, ...,   0,   0,   0], dtype=int32)

In [None]:
#vocabulary size
vocab_size = len(tokenizer.word_index)

In [None]:
valid_set = pd.read_csv('/content/drive/MyDrive/digimag/dev.csv',encoding="utf-8",sep='\t')
valid_set

Unnamed: 0.1,Unnamed: 0,content,label,label_id
0,0,نت‌فلیکس مجموعه‌ی سریالی کسلوانیا را می‌سازد [...,هنر و سینما,5
1,1,کارگردان The Last Guardian از بازی بعدی‌اش می‌...,بازی ویدیویی,0
2,2,کارت اعتباری اپل کارت معرفی شد در مراسم Show T...,علم و تکنولوژی,3
3,3,از کنفرانس مایکروسافت و کنسول بعدی سونی تا دیگ...,بازی ویدیویی,0
4,4,از رویداد ماه اکتبر اپل چه انتظاراتی داریم؟ در...,علم و تکنولوژی,3
...,...,...,...,...
762,762,۱۰ جزیره زیبای دنیا که حتما باید به آن‌ها سفر ...,سلامت و زیبایی,2
763,763,تریلر بسته الحاقی Iceborne برای Monster Hunter...,بازی ویدیویی,0
764,764,دیزنی از روابطش با الکترونیک آرتز می‌گوید به گ...,بازی ویدیویی,0
765,765,افتضاح لوت‌باکس‌های Battlefront ۲ سیاه‌ترین دو...,بازی ویدیویی,0


In [None]:
sentence_valid = np.asarray(valid_set['content'])
label_valid = np.asarray(valid_set['label_id'])

In [None]:
categorical_label_valid = to_categorical(label_valid, 7)
categorical_label_valid

array([[0., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
# Apply preprocessing step to valid data
valid_docs = np.empty_like(sentence_valid)
for index, document in enumerate(sentence_valid):
  valid_docs[index] = clean_data(document)

In [None]:
# Embed valid sequences
encoded_docs1 = tokenizer.texts_to_sequences(valid_docs)
x_valid_padded = pad_sequences(encoded_docs1, maxlen=max_length, padding='post')

In [None]:
model_blstm = Sequential()

model_blstm.add(Embedding(vocab_size, 100, input_length=max_length))
model_blstm.add(Bidirectional(LSTM(100, return_sequences=True, name='lstm_layer')))

model_blstm.add(GlobalMaxPool1D())
model_blstm.add(Dropout(0.25))
model_blstm.add(Dense(300, activation="relu"))
model_blstm.add(Dropout(0.2))
model_blstm.add(Dense(7, activation='softmax'))

In [None]:
model_blstm.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=["categorical_accuracy"])

model_blstm.summary()
batch_size_blstm = 64
epochs_blstm = 10

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 10148, 100)        6750400   
_________________________________________________________________
bidirectional_2 (Bidirection (None, 10148, 200)        160800    
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 200)               0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 300)               60300     
_________________________________________________________________
dropout_5 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 7)                

In [None]:
hist_blstm = model_blstm.fit(x_train_padded, categorical_label_train,validation_data=(x_valid_padded,categorical_label_valid),
                             batch_size=batch_size_blstm, epochs=epochs_blstm,
                             shuffle=True)

Train on 6896 samples, validate on 767 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
test_data = pd.read_csv('/content/drive/MyDrive/digimag/test.csv', encoding='utf-8', sep='\t')

x_test = test_data['content']
y_test = test_data['label_id']

In [None]:

print('Number of testing sentence: ', x_test.shape)
print('Number of testing label: ', y_test.shape)

Number of testing sentence:  (852,)
Number of testing label:  (852,)


In [None]:
# Convert dataframes to numpy arrays
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)

In [None]:
# Applying preprocessing step to test data
test_docs = np.empty_like(x_test)
for index, document in enumerate(x_test):
  test_docs[index] = clean_data(document)

In [None]:
# Embed testing sequences
encoded_docs = tokenizer.texts_to_sequences(test_docs)
x_test_padded = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

In [None]:
categorical_y_test = to_categorical(y_test, 7)

In [None]:
# Evaluate model
loss_blstm, acc_blstm = model_blstm.evaluate(x_test_padded, categorical_y_test, verbose=1)
print('Test Accuracy: %f' % (acc_blstm*100))

Test Accuracy: 92.018777


In [None]:
y_pred_blstm = model_blstm.predict_classes(x_test_padded)

In [None]:
text = []
true_label = []
pred_label = []

public =0

Art_cinema= 0 

Science_technology=0

Health_Beauty = 0

Purchase_guide = 0

Books_literature = 0

games=0



for i in range(len(y_pred_blstm)):
  text.append(x_test[i])
  true_label.append(y_test[i])

  if y_pred_blstm[i] == 0:
    games +=1
    pred_label.append("بازی ویدیویی")

  elif y_pred_blstm[i] == 1:
    Purchase_guide +=1
    pred_label.append("راهنمای خرید")

  elif y_pred_blstm[i] == 2:
    Health_Beauty +=1
    pred_label.append("سلامت و زیبایی")

  if y_pred_blstm[i] == 3:
    Science_technology +=1
    pred_label.append("علم و تکنولوژی")

  elif y_pred_blstm[i] == 4:
    public +=1
    pred_label.append("عمومی")

  elif y_pred_blstm[i] == 5:
    Art_cinema +=1
    pred_label.append("هنر و سینما")

  if y_pred_blstm[i] == 6:
    Books_literature +=1
    pred_label.append("کتاب و ادبیات")





```
array([1, 0, 1, 1, 1, 2, 4, 2, 3, 0, 0, 0, 1, 2, 0, 1, 0, 1, 0, 4, 0, 4,
       1, 1, 1, 4, 0, 4, 1, 2, 1, 1, 4, 0, 1, 0, 1, 1, 0, 1, 1, 0, 2, 0,
       3, 4, 0, 4, 1, 1])
       ```



In [None]:
print("بازی ویدیویی: " , (games/852)*100)
print("راهنمای خرید: " , (Purchase_guide/852)*100)
print("سلامت و زیبایی: " , (Health_Beauty/852)*100)
print("علم و تکنولوژی: " , (Science_technology/852)*100)
print("عمومی: " , (public/852)*100)
print("هنر و سینما: " , (Art_cinema/852)*100)
print("کتاب و ادبیات: " , (Books_literature/852)*100)



بازی ویدیویی:  23.591549295774648
راهنمای خرید:  1.5258215962441315
سلامت و زیبایی:  20.657276995305164
علم و تکنولوژی:  31.572769953051644
عمومی:  0.4694835680751174
هنر و سینما:  19.366197183098592
کتاب و ادبیات:  2.8169014084507045


In [None]:
dataFrame = pd.DataFrame({"text":text, "true label":true_label, "prediction label":pred_label})

In [None]:
dataFrame.to_excel("prediction2BLstm.xlsx", index=False)

#LSTM

In [None]:
valid_set = pd.read_csv('/content/drive/MyDrive/digimag/dev.csv',encoding="utf-8",sep='\t')
valid_set

Unnamed: 0.1,Unnamed: 0,content,label,label_id
0,0,نت‌فلیکس مجموعه‌ی سریالی کسلوانیا را می‌سازد [...,هنر و سینما,5
1,1,کارگردان The Last Guardian از بازی بعدی‌اش می‌...,بازی ویدیویی,0
2,2,کارت اعتباری اپل کارت معرفی شد در مراسم Show T...,علم و تکنولوژی,3
3,3,از کنفرانس مایکروسافت و کنسول بعدی سونی تا دیگ...,بازی ویدیویی,0
4,4,از رویداد ماه اکتبر اپل چه انتظاراتی داریم؟ در...,علم و تکنولوژی,3
...,...,...,...,...
762,762,۱۰ جزیره زیبای دنیا که حتما باید به آن‌ها سفر ...,سلامت و زیبایی,2
763,763,تریلر بسته الحاقی Iceborne برای Monster Hunter...,بازی ویدیویی,0
764,764,دیزنی از روابطش با الکترونیک آرتز می‌گوید به گ...,بازی ویدیویی,0
765,765,افتضاح لوت‌باکس‌های Battlefront ۲ سیاه‌ترین دو...,بازی ویدیویی,0


In [None]:
sentence_valid = np.asarray(valid_set['content'])
label_valid = np.asarray(valid_set['label_id'])

In [None]:
categorical_label_valid = to_categorical(label_valid, 7)
categorical_label_valid

array([[0., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
# Apply preprocessing step to valid data
valid_docs = np.empty_like(sentence_valid)
for index, document in enumerate(sentence_valid):
  valid_docs[index] = clean_data(document)

In [None]:
# Embed valid sequences
encoded_docs1 = tokenizer.texts_to_sequences(valid_docs)
x_valid_padded = pad_sequences(encoded_docs1, maxlen=max_length, padding='post')

In [None]:
model_lstm = Sequential()
model_lstm.add(Embedding(vocab_size, 100, input_length=max_length))
model_lstm.add(LSTM(100, return_sequences=True, name='lstm_layer'))
model_lstm.add(GlobalMaxPool1D())
model_lstm.add(Dropout(0.25))
model_lstm.add(Dense(300, activation="relu"))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(7, activation='softmax'))

In [None]:
model_lstm.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=["categorical_accuracy"])
model_lstm.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 10148, 100)        6750400   
_________________________________________________________________
lstm_layer (LSTM)            (None, 10148, 100)        80400     
_________________________________________________________________
global_max_pooling1d_3 (Glob (None, 100)               0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 300)               30300     
_________________________________________________________________
dropout_7 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 7)                

In [None]:
hist_lstm = model_lstm.fit(
    x_train_padded, categorical_label_train,validation_data=(x_valid_padded,categorical_label_valid),
    batch_size=64, epochs=10,
    shuffle=True, verbose=1)

Train on 6896 samples, validate on 767 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Evaluate model
loss_lstm, acc_lstm = model_lstm.evaluate(x_test_padded, categorical_y_test, verbose=1)
print('Test Accuracy: %f' % (acc_lstm*100))

Test Accuracy: 90.962440


In [None]:
y_pred_lstm = model_lstm.predict_classes(x_test_padded)

In [None]:
text = []
true_label = []
pred_label = []


public =0

Art_cinema= 0 

Science_technology=0

Health_Beauty = 0

Purchase_guide = 0

Books_literature = 0

games=0



for i in range(len(y_pred_blstm)):
  text.append(x_test[i])
  true_label.append(y_test[i])

  if y_pred_blstm[i] == 0:
    games +=1
    pred_label.append("بازی ویدیویی")

  elif y_pred_blstm[i] == 1:
    Purchase_guide +=1
    pred_label.append("راهنمای خرید")

  elif y_pred_blstm[i] == 2:
    Health_Beauty +=1
    pred_label.append("سلامت و زیبایی")

  if y_pred_blstm[i] == 3:
    Science_technology +=1
    pred_label.append("علم و تکنولوژی")

  elif y_pred_blstm[i] == 4:
    public +=1
    pred_label.append("عمومی")

  elif y_pred_blstm[i] == 5:
    Art_cinema +=1
    pred_label.append("هنر و سینما")

  if y_pred_blstm[i] == 6:
    Books_literature +=1
    pred_label.append("کتاب و ادبیات")



In [None]:
print("بازی ویدیویی: " , (games/852)*100)
print("راهنمای خرید: " , (Purchase_guide/852)*100)
print("سلامت و زیبایی: " , (Health_Beauty/852)*100)
print("علم و تکنولوژی: " , (Science_technology/852)*100)
print("عمومی: " , (public/852)*100)
print("هنر و سینما: " , (Art_cinema/852)*100)
print("کتاب و ادبیات: " , (Books_literature/852)*100)



بازی ویدیویی:  23.591549295774648
راهنمای خرید:  1.5258215962441315
سلامت و زیبایی:  20.657276995305164
علم و تکنولوژی:  31.572769953051644
عمومی:  0.4694835680751174
هنر و سینما:  19.366197183098592
کتاب و ادبیات:  2.8169014084507045


In [None]:
dataFrame = pd.DataFrame({"text":text, "true label":true_label, "prediction label":pred_label})

In [None]:
dataFrame.to_excel("prediction2Lstm.xlsx", index=False)

#GRU

In [None]:
from tensorflow.keras.layers import GRU
model_GRU = Sequential()
model_GRU.add(Embedding(vocab_size, 100, input_length=max_length))
model_GRU.add(GRU(100, return_sequences=True, name='gru_layer'))
model_GRU.add(GlobalMaxPool1D())
model_GRU.add(Dropout(0.25))
model_GRU.add(Dense(300, activation="relu"))
model_GRU.add(Dropout(0.2))
model_GRU.add(Dense(7, activation='softmax'))

In [None]:
model_GRU.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=["categorical_accuracy"])
model_GRU.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 10148, 100)        6750400   
_________________________________________________________________
gru_layer (GRU)              (None, 10148, 100)        60600     
_________________________________________________________________
global_max_pooling1d_4 (Glob (None, 100)               0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 300)               30300     
_________________________________________________________________
dropout_9 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 7)                

In [None]:
hist_GRU = model_GRU.fit(
    x_train_padded, categorical_label_train,validation_data=(x_valid_padded,categorical_label_valid),
    batch_size=64, epochs=20,
    shuffle=True, verbose=1)

Train on 6896 samples, validate on 767 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# Evaluate model
loss_GRU, acc_GRU = model_GRU.evaluate(x_test_padded, categorical_y_test, verbose=1)
print('Test Accuracy: %f' % (acc_GRU*100))

Test Accuracy: 93.309861


In [None]:
y_pred_GRU = model_GRU.predict_classes(x_test_padded)

In [None]:
text = []
true_label = []
pred_label = []
public =0

Art_cinema= 0 

Science_technology=0

Health_Beauty = 0

Purchase_guide = 0

Books_literature = 0

games=0



for i in range(len(y_pred_blstm)):
  text.append(x_test[i])
  true_label.append(y_test[i])

  if y_pred_blstm[i] == 0:
    games +=1
    pred_label.append("بازی ویدیویی")

  elif y_pred_blstm[i] == 1:
    Purchase_guide +=1
    pred_label.append("راهنمای خرید")

  elif y_pred_blstm[i] == 2:
    Health_Beauty +=1
    pred_label.append("سلامت و زیبایی")

  if y_pred_blstm[i] == 3:
    Science_technology +=1
    pred_label.append("علم و تکنولوژی")

  elif y_pred_blstm[i] == 4:
    public +=1
    pred_label.append("عمومی")

  elif y_pred_blstm[i] == 5:
    Art_cinema +=1
    pred_label.append("هنر و سینما")

  if y_pred_blstm[i] == 6:
    Books_literature +=1
    pred_label.append("کتاب و ادبیات")



In [None]:
print("بازی ویدیویی: " , (games/852)*100)
print("راهنمای خرید: " , (Purchase_guide/852)*100)
print("سلامت و زیبایی: " , (Health_Beauty/852)*100)
print("علم و تکنولوژی: " , (Science_technology/852)*100)
print("عمومی: " , (public/852)*100)
print("هنر و سینما: " , (Art_cinema/852)*100)
print("کتاب و ادبیات: " , (Books_literature/852)*100)


بازی ویدیویی:  23.591549295774648
راهنمای خرید:  1.5258215962441315
سلامت و زیبایی:  20.657276995305164
علم و تکنولوژی:  31.572769953051644
عمومی:  0.4694835680751174
هنر و سینما:  19.366197183098592
کتاب و ادبیات:  2.8169014084507045


In [None]:
dataFrame = pd.DataFrame({"text":text, "true label":true_label, "prediction label":pred_label})

In [None]:
dataFrame.to_excel("prediction2GRU.xlsx", index=False)

### Resource : 


```
https://arxiv.org/pdf/2004.05328.pdf
https://github.com/JoyeBright/DeepSentiPers

```

