In [19]:
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras.preprocessing.text import Tokenizer
import re
from codecarbon import EmissionsTracker
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from keras.callbacks import EarlyStopping

tracker = EmissionsTracker(project_name="Neural_network", measure_power_secs=10)
tracker.start_task("Neural_network_learning")

# Load the dataset
df = pd.read_csv('fake_and_real_data.csv')

# Preprocess the dataset
def preprocess_text(text):
    text = re.sub(r'http\S+', '', text)  # remove URLs
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # remove punctuation
    text = text.lower()  # convert to lowercase
    return text

# Assuming the dataset has two columns: 'text' and 'label'
df['Text'] = df['Text'].apply(preprocess_text)
texts = df['Text'].values
labels = df['label'].values

# Encode the labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Tokenize the text
tokenizer = Tokenizer(num_words=5000, lower=True)
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1

# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(texts)
maxlen = 200  # increase maxlen to capture more context
X = pad_sequences(sequences, maxlen=maxlen)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Build the neural network model
model = Sequential()
model.add(Embedding(vocab_size, 100, input_length=maxlen))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))  # increase LSTM units
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with early stopping
epochs = 10  # increase the number of epochs
batch_size = 64
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), 
                    callbacks=[early_stopping], verbose=2)

# Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print(f'Recall: {recall * 100:.2f}%')
print(f'F1 Score: {f1 * 100:.2f}%')

# Function to check if a string is fake news

tracker.stop()
# Example usage
text_to_check = "Your text to check if it's fake news or not."
result = check_fake_news(text_to_check)
print(result)


Epoch 1/10
117/117 - 83s - loss: 0.1739 - accuracy: 0.9465 - val_loss: 0.0128 - val_accuracy: 0.9960 - 83s/epoch - 706ms/step
Epoch 2/10
117/117 - 76s - loss: 0.0149 - accuracy: 0.9970 - val_loss: 0.0231 - val_accuracy: 0.9964 - 76s/epoch - 646ms/step
Epoch 3/10
117/117 - 75s - loss: 0.0111 - accuracy: 0.9985 - val_loss: 0.0075 - val_accuracy: 0.9980 - 75s/epoch - 639ms/step
Epoch 4/10
117/117 - 75s - loss: 0.0028 - accuracy: 0.9999 - val_loss: 0.0039 - val_accuracy: 0.9984 - 75s/epoch - 641ms/step
Epoch 5/10
117/117 - 74s - loss: 0.0016 - accuracy: 0.9997 - val_loss: 0.0064 - val_accuracy: 0.9976 - 74s/epoch - 629ms/step
Epoch 6/10
117/117 - 74s - loss: 2.1915e-04 - accuracy: 1.0000 - val_loss: 0.0101 - val_accuracy: 0.9964 - 74s/epoch - 632ms/step
Epoch 7/10
117/117 - 76s - loss: 1.5514e-04 - accuracy: 1.0000 - val_loss: 0.0099 - val_accuracy: 0.9968 - 76s/epoch - 647ms/step
Accuracy: 99.84%
Precision: 99.68%
Recall: 100.00%
F1 Score: 99.84%
Fake News


In [52]:
# def check_fake_news(text):
#     sequence = tokenizer.texts_to_sequences([text])
#     padded_sequence = pad_sequences(sequence, maxlen=maxlen)
#     prediction = (model.predict(padded_sequence) > 0.5).astype("int32")
#     return 'Fake News' if prediction[0][0] == 1 else 'Real News'

# Example usage
from IPython.display import clear_output

def check_fake_news(text):
    text = preprocess_text(text)
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    # print(model.predict(padded_sequence))
    prediction = (model.predict(padded_sequence) < 0.5).astype("int32")
    return ['Fake News', "Fake"] if prediction[0][0] == 1 else ['Real News', "Real"]

# yes = 0
# no = 0
# for x in range(1000):
#   result = check_fake_news(df["Text"][x])
#   if df["label"][x] == result[1]:
#     yes = yes + 1
#   else:
#     no = no + 1
#   # clear_output()
# print(f"Got {yes} right, {no} wrong")

In [66]:
# range(df.size)
text = " REPORT: Sean Spicer Forces Staffers To Surrender Phones In Search Of Leakers Donald Trump s paranoia and desperation to keep the American people in the dark just took an unconstitutional turn.For weeks, various news outlets have obtained inside information from White House sources such as memos and descriptions of what is going on behind the scenes. That s how we know about what executive orders Trump is considering and how we know about the chaos and infighting that has been plaguing Trump s administration. It s also how we know more about Trump s Russia scandal.Rather than address his executive orders or the chaos in the White House or his Russia scandal, Trump would rather whine about the leaks instead.The real story here is why are there so many illegal leaks coming out of Washington? Will these leaks be happening as I deal on N.Korea etc?  Donald J. Trump (@realDonaldTrump) February 14, 2017Information is being illegally given to the failing @nytimes & @washingtonpost by the intelligence community (NSA and FBI?).Just like Russia  Donald J. Trump (@realDonaldTrump) February 15, 2017The real scandal here is that classified information is illegally given out by  intelligence  like candy. Very un-American!  Donald J. Trump (@realDonaldTrump) February 15, 2017You know what is really un-American? Forcing your staffers to hand over their phones for an inspection.According to a report by Politico, White House mouthpiece Sean Spicer actually confiscated the phones of his staff to go through them in order to make sure they are not leaking any information to the press.Of course, if someone had refused to hand over their phone they would have more than likely lost their job so they definitely didn t volunteer to give their phones up. They had to do it out of fear.The staffers were called to an  emergency meeting  and told to place any electronic device they had on them on a table, including personal cell phones, so they could be searched.But the Constitution specifically bars such illegal searches and seizures.The Fourth Amendment states, The right of the people to be secure in their persons, houses, papers, and effects, against unreasonable searches and seizures, shall not be violated, and no Warrants shall issue, but upon probable cause, supported by Oath or affirmation, and particularly describing the place to be searched, and the persons or things to be seized. In other words, if Trump really thinks the leaks are  illegal,  he should have to get a warrant to search the phones of his employees. But he didn t, which means his search is illegal and in violation of the constitutional rights of his staffers. Trump and Spicer can hardly call the confiscation voluntary either since many of these staffers probably believed they would be fired if they did not hand over their phones. That means they were forced to do so, and they can file a lawsuit for having their privacy violated.Once again, Donald Trump and his team are violating the Constitution. Republicans in Congress cannot continue to allow Trump to trample our constitutional rights. It s time to investigate and impeach him. Because at this rate, he ll literally be ripping up the Constitution if he isn t stopped.Featured image via Win McNamee/Getty Images"
result = check_fake_news(text)
result



['Fake News', 'Fake']

In [58]:
import mlflow
mlflow.sklearn.save_model(model, "Neural network")

MlflowException: Path 'Neural network' already exists and is not empty