<a href="https://colab.research.google.com/github/myazzeh/NLP-Course/blob/main/Sequence_Learning/NLP_RNN_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Import necessary libraries**#

In [None]:
import pandas as pd
import numpy as np
import requests
import keras
from keras.layers import Embedding, Flatten, SimpleRNN, Dense
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.utils import pad_sequences

#**Load datasets from github and split them to input and output**#

In [None]:
train_df= pd.read_csv('https://raw.githubusercontent.com/myazzeh/NLP-Course/main/datasets/fake_news_train.csv')
test_df= pd.read_csv('https://raw.githubusercontent.com/myazzeh/NLP-Course/main/datasets/fake_news_test.csv')

In [None]:
x_train, y_train= train_df['claim_s'], train_df['fake_flag']
x_test, y_test= test_df['claim_s'], test_df['fake_flag']

#**Apply Tokenization and Align all text sequences**#
num_words parameter in Tokenizer returns the most n frequent words that will appear in the text sequences.

In [None]:
tok = Tokenizer(num_words=6000, oov_token='[UNK]')
tok.fit_on_texts (x_train)
train_seq= tok.texts_to_sequences(x_train)
test_seq= tok.texts_to_sequences(x_test)
print (f'size of vocab is {len(tok.word_index)}')

In [None]:
max_seq_leng= 10
vocab= len(tok.word_index)
embd_size= 100
train_seq= pad_sequences(train_seq, maxlen= max_seq_leng, padding ='post', truncating='post')
test_seq= pad_sequences(test_seq, maxlen= max_seq_leng, padding = 'post', truncating='post')
train_seq[0:4]

#**Text Classification Model 1 using one RNN layer**#

In [None]:
model= Sequential()
model.add(Embedding(input_dim= vocab , output_dim= embd_size, input_length= max_seq_leng))
model.add(SimpleRNN(units = embd_size, return_sequences = False))
model.add(Dense(30))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_seq, y_train, epochs=10, validation_data=(test_seq, y_test) )

In [None]:
import tensorflow as tf

prd = model.predict(test_seq)
[1 if x>=0.5 else 0 for x in prd]

#**Text Classification Model 2 using one Bidirectional RNN layer**#

In [None]:
from keras.layers import Bidirectional, Average
model= Sequential()
model.add(Embedding(input_dim=vocab , output_dim= embd_size, input_length= max_seq_leng))
model.add(Bidirectional(SimpleRNN(units= embd_size, return_sequences=True)))
model.add(Flatten())
model.add(Dense(1))
model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_seq, y_train, epochs=10, validation_data=(test_seq, y_test) )

#**Text Classification Model 3 using Stacked RNN layers**#

In [None]:
from keras.layers import Bidirectional
model= Sequential()
model.add(Embedding(input_dim=vocab , output_dim= embd_size, input_length= max_seq_leng))
model.add(SimpleRNN(units= embd_size, return_sequences=True))
model.add(SimpleRNN(units= embd_size, return_sequences=True))
model.add(SimpleRNN(units= embd_size, return_sequences=False))
model.add(Dense(30))
model.add(Dense(1))
model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_seq, y_train, epochs=10, validation_data=(test_seq, y_test) )