In [None]:
import random
import pickle
import heapq

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nltk.tokenize import RegexpTokenizer

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [None]:
text_df = pd.read_csv("/content/fake_or_real_news.csv")
text = list(text_df.text.values)
joined_text = " ".join(text)

In [None]:
partial_text = joined_text[:100000]

In [None]:
tokenizer = RegexpTokenizer(r"\w+")
tokens = tokenizer.tokenize(partial_text.lower())

In [None]:
unique_tokens= np.unique(tokens)
unique_token_index = { token: idx for idx, token in enumerate(unique_tokens) }

In [None]:
n_words= 10
input_words =[]
next_words = []
for i in range(len(tokens)-n_words):
  input_words.append(tokens[i:i+n_words])
  next_words.append(tokens[i+n_words])

In [None]:
x= np.zeros((len(input_words), n_words, len(unique_tokens)), dtype=bool)
y= np.zeros((len(next_words), len(unique_tokens)), dtype=bool)

In [None]:
for i, words in enumerate(input_words):
  for j, word in enumerate(words):
    x[i, j, unique_token_index[word]]=1
  y[i, unique_token_index[next_words[i]]]=1

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(n_words, len(unique_tokens)), return_sequences=True))
model.add(LSTM(128))
model.add(Dense(len(unique_tokens)))
model.add(Activation("softmax"))

In [None]:
model.compile(loss="categorical_crossentropy", optimizer=RMSprop(learning_rate=0.01), metrics=["accuracy"])
model.fit(x, y, batch_size=128, epochs=10, shuffle=True)

Epoch 1/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 410ms/step - accuracy: 0.1100 - loss: 5.7358
Epoch 2/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 404ms/step - accuracy: 0.1302 - loss: 5.4193
Epoch 3/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 406ms/step - accuracy: 0.1616 - loss: 5.0844
Epoch 4/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 405ms/step - accuracy: 0.1753 - loss: 4.8393
Epoch 5/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 428ms/step - accuracy: 0.2140 - loss: 4.4906
Epoch 6/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 406ms/step - accuracy: 0.2529 - loss: 4.1629
Epoch 7/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 401ms/step - accuracy: 0.2927 - loss: 3.8491
Epoch 8/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 389ms/step - accuracy: 0.3448 - loss: 3.4997
Epoch 9/10
[1m1

<keras.src.callbacks.history.History at 0x7e3fff8cf990>

In [None]:
model.save("mymodel.h5")
model= load_model("mymodel.h5")



In [None]:
def predict_next_word(input_text, n_best):
    input_text = input_text.lower()
    X = np.zeros((1, n_words, len(unique_tokens)))
    for i, word in enumerate(input_text.split()):
        X[0, i, unique_token_index[word]] = 1

    predictions = model.predict(X)[0]
    return np.argpartition(predictions, -n_best)[-n_best:]

In [None]:
possible = predict_next_word("The president of the united states has announced that he", 5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step


In [None]:
print([unique_tokens[idx] for idx in possible])

[np.str_('doesn'), np.str_('wants'), np.str_('are'), np.str_('will'), np.str_('was')]


In [None]:
def generate_text(input_text, text_length, creativity=3):
  word_sequence = input_text.split(' ')
  current = 0
  for _ in range(text_length):
    sub_sequence = " ".join(tokenizer.tokenize(" ".join(word_sequence).lower())[current:current+n_words])
    try:
      choice = unique_tokens[random.choice(predict_next_word(sub_sequence, creativity))]
    except:
      choice = random.choice(unique_tokens)
    word_sequence.append(choice)
    current +=1
  return " ".join(word_sequence)

In [None]:
generate_text("The president of the united states has announced that he", 100, 5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

'The president of the united states has announced that he doesn t show this election it was on new evidence but it are gone a very race in the house clinton is in women to get democrats who had clear elsewhere to get people until a legislation himself though but also was lead on ernst there was and new new same thing a bernie resolution with them there is about that day that tend and just at as women as bill the same news majority at a man party who was out of tehran until and what they less say when get more now no an school r in iowa'