# Importing Labraries

In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords , wordnet
import re
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow
from tensorflow import keras
from tensorflow.keras import layers # type: ignore
import joblib
from sklearn.model_selection import train_test_split
from nltk import pos_tag

# Data Loading

In [None]:
df = pd.read_csv('news.csv')


# Data Pre-processing

In [159]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

In [160]:
def get_pos(pos):
    if pos.startswith('J'):
        return wordnet.ADJ
    if pos.startswith('N'):
        return wordnet.NOUN
    if pos.startswith('V'):
        return wordnet.VERB
    if pos.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

In [161]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]',' ',text)
    text = re.sub(r'\s+',' ',text).strip()
    words = word_tokenize(text)
    stp = [w for w in words if w not in stop_words]
    pos_tags = pos_tag(stp)
    lemma = [lemmatizer.lemmatize(w,get_pos(pos)) for w,pos in pos_tags]
    return ' '.join(lemma)

In [162]:
df['Clean text'] = df['text'].apply(clean_text)
df['label'] = df['label'].map({'FAKE':0,'REAL':1})

# Data Exploration

In [163]:
df['text'].iloc[0]



In [164]:
df['Clean text'].iloc[0]

'daniel greenfield shillman journalism fellow freedom center new york writer focus radical islam final stretch election hillary rodham clinton go war fbi word unprecedented thrown around often election ought retire still unprecedented nominee major political party go war fbi exactly hillary people do coma patient wake watch hour cnn hospital bed would assume fbi director james comey hillary opponent election fbi attack everyone obama cnn hillary people circulate letter attack comey currently medium hit piece lambast target trump surprising clinton ally start run attack ad fbi fbi leadership warn entire leave wing establishment form lynch mob continue go hillary fbi credibility attack medium democrat preemptively head result investigation clinton foundation hillary clinton covert struggle fbi agent obama doj people go explosively public new york time compare comey j edgar hoover bizarre headline james comey role recall hoover fbi fairly practically admit front spout nonsense boston glob

# Data Splitting

In [165]:
train_X,test_X,train_y,test_y = train_test_split(df['Clean text'],df['label'],test_size=0.2,random_state=42)

In [166]:
vector = TfidfVectorizer()

In [167]:
train_X_vector = vector.fit_transform(train_X).toarray()
test_X_vector = vector.transform(test_X).toarray()

# Model training

In [168]:
input_dim = train_X_vector.shape[1]

In [169]:
model = keras.Sequential([
    layers.Dense(128,activation='relu',input_shape=(input_dim,)),
    layers.Dense(64,activation='relu'),
    layers.Dense(1,activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [170]:
model.fit(train_X_vector,train_y,epochs=10,batch_size=64,validation_data=(test_X_vector,test_y))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2e3f2ea1820>

In [171]:
model.evaluate(test_X_vector,test_y)



[0.17939774692058563, 0.938437283039093]

# Saving model and vectorizer

In [172]:
joblib.dump(vector,'news_vectorizer.joblib')

['news_vectorizer.joblib']

In [173]:
model.save('news_model.h5')