# FAKE TEXT DETECTION BY TRAINING BERT ON GIVEN DATA 

In [None]:
#%pip install pandas
#%pip install numpy 

In [None]:
import pandas as pd
import numpy as np
import csv

**LOAD DATA**

In [None]:
df_fake = pd.read_csv("Data/Fake.csv/Fake.csv")
df_true = pd.read_csv("Data/True.csv/True.csv")

**EDIT DATA**

In [None]:
df_fake["Label"] = "Fake"
df_true["Label"] = "True"
df = pd.concat([df_fake,df_true])
df.head(100)

**DETECT NULL VALUES**

In [None]:
df.isnull().sum()

## DATA ANALYSIS

In [None]:
#%pip install matplotlib
#%pip install seaborn

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Creating Figure
fig, axes = plt.subplots(1,2, figsize = (15,6))
#Adding the histogram1 - Fake News
sns.histplot(df_fake.subject, palette = 'Set1', alpha = 0.5, ax = axes[0])
axes[0].tick_params(axis = 'x', rotation = 90)
axes[0].set_title('Fake News Subject')
#Adding the histogram2 - True News
sns.histplot(df_true.subject, palette = 'Set1', alpha = 0.5, ax = axes[1])
axes[1].tick_params(axis = 'x', rotation = 90)
axes[1].set_title('True  News Subject')
#Printing the count of Subject
print("Fake News Subject : ",dict(df_fake.subject.value_counts()))
print("True News Subject : ",dict(df_true.subject.value_counts()))

In [None]:
sns.histplot(df.Label, palette = 'Set1', alpha = 0.5)
plt.tick_params(axis = 'x', rotation = 90)
plt.title('True VS Fake News')
df.Label.value_counts()

**SETUP TRAINING DATA**

In [None]:
df["text"] = df["title"]+df["text"] #considering text and title as X

In [None]:
df['Label'] = df['Label'].map({'True':1, 'Fake':0})

In [None]:
#%pip install scikit-learn

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['Label'], stratify = df['Label'], test_size = 0.2, random_state = 10)

## SETTING UP BERT

In [None]:
%pip install transformers

**SETUP AUTOTOKENIZER**

In [None]:
from transformers import AutoTokenizer
def tokenize(X):
    X = tokenizer(
        text = list(X),
        add_special_tokens = True,
        max_length = 100,
        truncation = True,
        padding = 'max_length',
        return_tensors = 'tf',
        return_token_type_ids = False,
        return_attention_mask = True,
        verbose = True
        )
    return X
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

**TOKENIZE VALUES**

In [None]:
X_train_tokens = tokenize(X_train)
X_test_tokens = tokenize(X_test)

In [None]:
print(X_test_tokens)
print(y_test)

## MAKING  MODEL

In [None]:
%pip install tensorflow
%pip install keras
#%pip install tensorflow-cpu==2.10
#%pip install tensorflow-directml-plugin

In [None]:
import tensorflow as tf 
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Embedding
Adams= tf.keras.optimizers.legacy.Adam
from transformers import TFBertModel

#print(tf.add([1.0, 2.0], [3.0, 4.0])) 
#tf.config.list_physical_devices('GPU') 
#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


**CREATE BERT FUNCTION**

In [None]:
Length = 100
def get_model():
    dropout_rate = 0.2
    input_ids = Input(shape = (Length,), dtype = tf.int32, name = 'input_ids')
    input_mask = Input(shape = (Length,), dtype = tf.int32, name = 'input_mask')
    embeddings = bert([input_ids, input_mask])[1] #pooler output
    print(embeddings)
    out = Dropout(0.2)(embeddings)
    #64 units dense layer
    out = Dense(64,activation = 'relu')(out)
    out = Dropout(0.2)(out)
    y = Dense(1,activation = 'sigmoid')(out)
    model = Model(inputs=[input_ids, input_mask], outputs=y)
    model.layers[2].trainable = True
    #define optimizer
    optimizer = Adams(learning_rate=1e-05, epsilon=1e-08, decay=0.01,clipnorm=1.0)
    #complile the model
    model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = 'accuracy')
    return model

In [None]:
bert = TFBertModel.from_pretrained('bert-base-uncased')

**PLOT MODEL STRUCTURE**

In [None]:
#%pip install pydot
#%pip install graphviz

In [None]:
model = get_model()
tf.keras.utils.plot_model(model)

## TRAIN MODEL

In [None]:
from keras.callbacks import EarlyStopping
history = model.fit(x = {'input_ids':X_train_tokens['input_ids'],'input_mask':X_train_tokens['attention_mask']}, 
                    y = y_train, epochs=1, 
                    validation_split = 0.2, 
                    batch_size = 16, 
                    callbacks=[EarlyStopping( monitor='val_accuracy' ,mode='max', patience=3,verbose=False,restore_best_weights=True)])