In [None]:
# Import necessary libraries
import random
import pickle

import numpy as np
import pandas as pd
from nltk.tokenize import RegexpTokenizer

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [None]:
# Load the dataset
text_df = pd.read_csv("/content/fake_or_real_news.csv")

In [None]:
# Combine text data into a single string
text = list(text_df.text.values)
joined_text = " ".join(text)

In [None]:
# Take a partial text from the beginning
partial_text = joined_text[:10000]

In [None]:
# Tokenize the text using regular expression
tokenizer = RegexpTokenizer(r"\w+")
tokens = tokenizer.tokenize(partial_text.lower())

In [None]:
# Find unique tokens and create a dictionary for mapping tokens to indices
unique_tokens = np.unique(tokens)
unique_token_index = {token: idx for idx, token in enumerate(unique_tokens)}

In [None]:
# Define the number of words for the input sequence
n_words = 10
input_words = []
next_words = []

In [None]:
# Create input and output sequences for the model
for i in range(len(tokens) - n_words):
    input_words.append(tokens[i:i + n_words])
    next_words.append(tokens[i + n_words])

In [None]:
# Initialize the input and output arrays
X = np.zeros((len(input_words), n_words, len(unique_tokens)), dtype=bool)
y = np.zeros((len(next_words), len(unique_tokens)), dtype=bool)

In [None]:
# Convert input and output sequences into a suitable format for the model
for i, words in enumerate(input_words):
    for j, word in enumerate(words):
        X[i, j, unique_token_index[word]] = 1
    y[i, unique_token_index[next_words[i]]] = 1

In [None]:
# RNN Model
# Create a sequential model
rnn_model = Sequential()

In [None]:
# Add a simple RNN layer with 128 units
rnn_model.add(SimpleRNN(128, input_shape=(n_words, len(unique_tokens),), return_sequences=True))
rnn_model.add(SimpleRNN(128))

In [None]:
# Add a dense layer with the number of unique tokens and a softmax activation function
rnn_model.add(Dense(len(unique_tokens)))
rnn_model.add(Activation('softmax'))

In [None]:
# Compile the RNN model with appropriate loss function, optimizer, and metrics
rnn_model.compile(loss="categorical_crossentropy", optimizer=RMSprop(learning_rate=0.01), metrics=["accuracy"])

In [None]:
# Train the RNN model using the training data
rnn_model.fit(X, y, batch_size=128, epochs=30, shuffle=True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7e84440d7d00>

In [None]:
# Save the RNN model
rnn_model.save("mymodel_rnn.h5")

  saving_api.save_model(


In [None]:
# LSTM Model
# Create a sequential model
lstm_model = Sequential()

In [None]:
# Add an LSTM layer with 128 units
lstm_model.add(LSTM(128, input_shape=(n_words, len(unique_tokens)), return_sequences=True))
lstm_model.add(LSTM(128))

In [None]:
# Add a dense layer with the number of unique tokens and a softmax activation function
lstm_model.add(Dense(len(unique_tokens)))
lstm_model.add(Activation('softmax'))

In [None]:
# Compile the LSTM model with appropriate loss function, optimizer, and metrics
lstm_model.compile(loss="categorical_crossentropy", optimizer=RMSprop(learning_rate=0.01), metrics=["accuracy"])

In [None]:
# Train the LSTM model using the training data
lstm_model.fit(X, y, batch_size=128, epochs=30, shuffle=True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7e844f858520>

In [None]:
# Save the LSTM model
lstm_model.save("mymodel_lstm.h5")

In [None]:
# Function to predict the next word based on the input text and the trained model
def predict_next_word(model, input_text, n_best, unique_tokens, unique_token_index, n_words):
    input_text = input_text.lower()
    X = np.zeros((1, n_words, len(unique_tokens)))
    for i, word in enumerate(input_text.split()):
        X[0, i, unique_token_index[word]] = 1

    predictions = model.predict(X)[0]
    return np.argpartition(predictions, -n_best)[-n_best:]

In [None]:
# Function to generate text based on the input text and the trained model
def generate_text(model, input_text, text_length, creativity, unique_tokens, unique_token_index, n_words):
    word_sequence = input_text.split()
    current = 0
    for _ in range(text_length):
        sub_sequence = " ".join(tokenizer.tokenize(" ".join(word_sequence).lower())[current:current + n_words])
        try:
            choice = unique_tokens[random.choice(predict_next_word(model, sub_sequence, creativity, unique_tokens, unique_token_index, n_words))]
        except:
            choice = random.choice(unique_tokens)
        word_sequence.append(choice)
        current += 1
    return " ".join(word_sequence)

In [None]:
# Generate text using the RNN model
generated_text_rnn = generate_text(rnn_model, "He will have to look into this thing and he", 100, 5, unique_tokens, unique_token_index, n_words)



In [None]:
# Generate text using the LSTM model
generated_text_lstm = generate_text(lstm_model, "He will have to look into this thing and he", 100, 5, unique_tokens, unique_token_index, n_words)



In [None]:
# Print the generated texts RNN
print("Generated Text (RNN): ", generated_text_rnn)

Generated Text (RNN):  He will have to look into this thing and he are a are of the a are the fighting the of fighting fighting a fighting the are a the a the the fighting the fighting of a the the of the of of fighting are of of of fighting a a are the fighting the are of are a of the of fighting fighting the the the are are of a a a the of a the fighting fighting a fighting of a the the fighting of fighting of of the a of of fighting a a are the of are fighting of a are the are fighting a a


In [None]:
# Print the generated texts LSTM
print("Generated Text (LSTM): ", generated_text_lstm)

Generated Text (LSTM):  He will have to look into this thing and he defending he lie and isn it his be him for anthony and s paul to be him between way has chances to be himself the time and obama as the final stretch of her ever vytt49yvoe campaign going near is a afraid for s compared hillary it the chances of its bigger that it just up be for stand the people has done is comey and during it front election she ought that has changed hillary desperation like before reason the same this of especially it s gone what her the revelation now the old candidate then the years is
