# Generator with LSTM

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Here we should be reading from the FAKE NEWS
texts = [
    "Fake news sentence 1",
    "Fake news sentence 2",
    "Fake news sentence 3",
    # Add more fake news sentences as needed
]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
total_words = len(tokenizer.word_index) + 1  # Vocabulary size

input_sequences = []
for line in texts:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]

model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_length - 1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

# Train your model using X and y
# You might want to one-hot encode y for categorical crossentropy

# Generate text using the trained model
seed_text = "Your starting text here"
next_words = 100

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')
    predicted = model.predict(token_list, verbose=0)
    predicted_word = tokenizer.index_word[np.argmax(predicted)]
    seed_text += " " + predicted_word

print(seed_text)

# Generator with Transformers

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, AdamW

# Here we should be reading from the FAKE NEWS
text_data = [
    "Fake news sentence 1",
    "Fake news sentence 2",
    "Fake news sentence 3",
    # Add more fake news sentences as needed
]

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenized_data = tokenizer(text_data, padding=True, truncation=True, max_length=512, return_tensors='pt')

# Model Training
config = GPT2Config.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)
model = GPT2LMHeadModel(config)
train_dataset = torch.utils.data.TensorDataset(tokenized_data['input_ids'], tokenized_data['attention_mask'])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
optimizer = AdamW(model.parameters(), lr=1e-5)

num_epochs = 5
for epoch in range(num_epochs):
    for batch in train_loader:
        input_ids, attention_mask = batch
        outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

# Generate text using the trained model
seed_text = "Your starting text here"
input_ids = tokenizer.encode(seed_text, return_tensors='pt')
output = model.generate(input_ids, max_length=200, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("Generated Fake News: ", generated_text)
