# Importing Libraries

In [1]:
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
import pickle

# Reading Dataset

In [2]:
df=pd.read_csv("train.csv",nrows=1000)
df.head()

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


# Basic Inspection of Dataset

In [3]:
df.describe()

Unnamed: 0,id,label
count,1000.0,1000.0
mean,499.5,0.501
std,288.819436,0.500249
min,0.0,0.0
25%,249.75,0.0
50%,499.5,1.0
75%,749.25,1.0
max,999.0,1.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      1000 non-null   int64 
 1   title   973 non-null    object
 2   author  897 non-null    object
 3   text    998 non-null    object
 4   label   1000 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 39.2+ KB


# Checking Null Values

In [5]:
df.isnull().sum()

id          0
title      27
author    103
text        2
label       0
dtype: int64

# Filling Null values

In [6]:
df=df.fillna('')

In [7]:
df.isnull().sum()

id        0
title     0
author    0
text      0
label     0
dtype: int64

# Removing Unnescessory Features from the Dataset

In [8]:
df.columns

Index(['id', 'title', 'author', 'text', 'label'], dtype='object')

In [9]:
df=df.drop(['id', 'title', 'author'], axis=1)
df.head()

Unnamed: 0,text,label
0,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,Ever get the feeling your life circles the rou...,0
2,"Why the Truth Might Get You Fired October 29, ...",1
3,Videos 15 Civilians Killed In Single US Airstr...,1
4,Print \nAn Iranian woman has been sentenced to...,1


# Stemming

In [10]:
port_stem=PorterStemmer()
port_stem

<PorterStemmer>

In [11]:
def stemming(content):
    con=re.sub('[^a-zA-Z]', ' ', content)
    con=con.lower()
    con=con.split()
    con=[port_stem.stem(word) for word in con if not word in stopwords.words('english')]
    con=' '.join(con)
    return con

In [12]:
df['text']= df['text'].apply(stemming)

# Seperating Input and Output features

In [13]:
x=df['text']
y=df['label']

# Train Test Split

In [14]:
x_train , x_test , y_train, y_test = train_test_split(x, y, test_size=0.20)

# TF-IDF Vectorization

In [15]:
vect=TfidfVectorizer()

In [16]:
x_train=vect.fit_transform(x_train)
x_test=vect.transform(x_test)

# Model Creation

In [17]:
model=DecisionTreeClassifier()

# Model Training

In [18]:
model.fit(x_train, y_train)

# Model Prediction

In [19]:
prediction=model.predict(x_test)

In [20]:
prediction

array([0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0], dtype=int64)

# Model Prediction

In [21]:
model.score(x_test, y_test)

0.72

# Storing Model and Vectors in Pickle File Format

In [22]:
pickle.dump(vect, open('vector.pkl', 'wb'))
pickle.dump(model, open('model.pkl', 'wb'))

# Model and Vectors Evaluation For Sample Data

In [23]:
vector_form=pickle.load(open('vector.pkl', 'rb'))
load_model=pickle.load(open('model.pkl', 'rb'))

In [24]:
def fake_news(news):
    news=stemming(news)
    input_data=[news]
    vector_form1=vector_form.transform(input_data)
    prediction = load_model.predict(vector_form1)
    return prediction

In [25]:
val=fake_news("""In these trying times, Jackie Mason is the Voice of Reason. [In this week’s exclusive clip for Breitbart News, Jackie discusses the looming threat of North Korea, and explains how President Donald Trump could win the support of the Hollywood left if the U. S. needs to strike first.  “If he decides to bomb them, the whole country will be behind him, because everybody will realize he had no choice and that was the only thing to do,” Jackie says. “Except the Hollywood left. They’ll get nauseous. ” “[Trump] could win the left over, they’ll fall in love with him in a minute. If he bombed them for a better reason,” Jackie explains. “Like if they have no transgender toilets. ” Jackie also says it’s no surprise that Hollywood celebrities didn’t support Trump’s strike on a Syrian airfield this month. “They were infuriated,” he says. “Because it might only save lives. That doesn’t mean anything to them. If it only saved the environment, or climate change! They’d be the happiest people in the world. ” Still, Jackie says he’s got nothing against Hollywood celebs. They’ve got a tough life in this country. Watch Jackie’s latest clip above.   Follow Daniel Nussbaum on Twitter: @dznussbaum """)

In [26]:
if val==[0]:
    print('reliable')
else:
    print('unreliable')

reliable
