# FAKE NEWS CLASSIFIER 

![](https://miro.medium.com/max/1400/1*RGVPc-MT0q_DCHCavFRHvA.jpeg)

# IMPORTING THE LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import scipy as sp
import string
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline




# LOADING THE DATASET

In [None]:
data=pd.read_csv("../input/fake-news-classifier-data/test.csv")


In [None]:
data

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.value_counts

In [None]:
data.dtypes

In [None]:
data.shape

In [None]:
data.columns

# **Checking Null Values**

In [None]:
data.isnull().sum()

In [None]:
data.isnull().any()

***So we have to drop the null values .***

In [None]:
###Drop Nan Values
data=data.dropna()


In [None]:
data.isnull().sum()

***So now we can see all the null values have been dropped .***

In [None]:
## Get the Independent Features

X=data.drop('id',axis=1)


In [None]:
y=data['id']


In [None]:
y.value_counts()


In [None]:
X.shape

In [None]:
y.shape

# **Exploratory Data Analysis**

In [None]:
plt.style.use("default")
sns.barplot(x="id", y="title",data=data[180:190])
plt.title("ID vs TITLE",fontsize=15)
plt.xlabel("ID")
plt.ylabel("TITLE")
plt.show()




In [None]:
data.columns

In [None]:
sns.set_palette("Paired")
sns.pairplot(data,hue='text',height=5,palette='colorblind')
plt.show()


In [None]:
plt.figure(figsize=(14,10))
sns.set_style(style='whitegrid')
plt.subplot(2,3,1)
sns.boxplot(x='id',data=data)



In [None]:
sns.pairplot(data=data)

In [None]:
import tensorflow as tf


**LSTM**

In [None]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense



**With Dense we can also use Dropout and Batch Normalization**

In [None]:
### Vocabulary size
voc_size=5000


In [None]:
messages=X.copy()


In [None]:
messages.reset_index(inplace=True)



**NLTK**

In [None]:
import nltk
import re
from nltk.corpus import stopwords

In [None]:
nltk.download('stopwords')


**Stemming and Lemmatization are Text Normalization (or sometimes called Word Normalization) techniques in the field of Natural Language Processing that are used to prepare text, words, and documents for further processing.**

In [None]:
### Dataset Preprocessing
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
corpus = []
for i in range(0, len(messages)):
    print(i)
    review = re.sub('[^a-zA-Z]', ' ', messages['title'][i])
    review = review.lower()
    review = review.split()
    
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)


In [None]:
corpus

In [None]:
onehot_repr=[one_hot(words,voc_size)for words in corpus] 
onehot_repr

In [None]:
sent_length=20
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)


In [None]:
embedded_docs[0]


In [None]:
## Creating model
embedding_vector_features=40
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='mae',optimizer='adam',metrics=['accuracy'])



**Types of Activation Functions:**

**1) Relu**

**2) Sigmoid**

**3) Threshold**

**4) Hyperbolic Tangent**

In [None]:
model.summary()

**Types of Optimizers:**

**1) Gradient Descent (GD)**

**2) Stochastic Gradient Descent**

**3) Mini-Batch Gradient Descent**

**4) Adagrad**

**5) RMSProp**





In [None]:
import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)


In [None]:
X_final.shape,y_final.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.3, random_state=42)


In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=50,batch_size=256)


**MODEL CREATION**

In [None]:
from tensorflow.keras.layers import Dropout
## Creating model
embedding_vector_features=40
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(Dropout(0.3))
model.add(LSTM(100))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='mae',optimizer='adam',metrics=['accuracy'])

**Types of Loss Functions:**

**1) Mean Squared Error**

**2) Regression Loss Function**

**3) Mean Absolute Error Loss**

**4) Binary Classification Loss Function**

**5) Binary Cross Entropy Loss**


In [None]:
y_pred=model.predict(X_test)


In [None]:
y_pred

In [None]:
print((y_pred > 0.5))


In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, show_shapes = True)


In [None]:
model.summary()

# **Thank You** 