In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

import re
import string

In [7]:
dfake = pd.read_csv('./dataset/Fake.csv')
dtrue = pd.read_csv('./dataset/True.csv')

In [9]:
dfake['class'] = 0
dtrue['class'] = 1

# Identify the quantity of entries and its columns
dfake.shape, dtrue.shape

((23481, 5), (21417, 5))

In [20]:
# Clean dataset
dfake_manual_testing = dfake.tail(10)
dfake.drop(dfake.tail(10).index, inplace=True)

dtrue_manual_testing = dtrue.tail(10)
dtrue.drop(dtrue.tail(10).index, inplace=True)

dfake.shape, dtrue.shape


((23451, 5), (21397, 5))

In [21]:
dfake_manual_testing['class'] = 0
dtrue_manual_testing['class'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfake_manual_testing['class'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtrue_manual_testing['class'] = 1


In [22]:
# View first 10 entries
dfake_manual_testing.head(10)

Unnamed: 0,title,text,subject,date,class
23451,3.57 Degrees: Kevin Bacon’s Cultural Mantle Sh...,21st Century Wire says Unless you have been li...,Middle-east,"February 9, 2016",0
23452,Bernie Sanders Could End Up Winning Iowa,21st Century Wire says Iowa s Democratic Party...,Middle-east,"February 4, 2016",0
23453,Plastic Persona: Behind the Scenes of the Ted ...,21st Century Wire says Most people accept that...,Middle-east,"February 4, 2016",0
23454,‘Meet Jeb’ – Going For Your Sympathy Vote,"21st Century Wire says As Republican, Ted Cruz...",Middle-east,"February 4, 2016",0
23455,"BOILER ROOM – Examination, Exclamation, Excita...",Tune in to the Alternate Current Radio Network...,Middle-east,"February 3, 2016",0
23456,Eyewash: CIA Elites Misleading Employees Indic...,21st Century Wire says The CIA is trying its b...,Middle-east,"February 3, 2016",0
23457,Activist: ‘This is where you can make the most...,21st Century Wire says If you ve been followin...,Middle-east,"February 3, 2016",0
23458,Episode #120 – SUNDAY WIRE: ‘Crisis of Liberty...,Episode #120 of SUNDAY WIRE SHOW finally resum...,Middle-east,"January 31, 2016",0
23459,FBI Release Oregon Video Footage Depicting Dea...,21st Century Wire says..UPDATE 1-29-2016 at 01...,Middle-east,"January 29, 2016",0
23460,Trial By YouTube: Mainstream Media Use Second-...,Patrick Henningsen 21st Century WireThere exi...,Middle-east,"January 28, 2016",0


In [23]:
# View first 10 entries
dtrue_manual_testing.head(10)

Unnamed: 0,title,text,subject,date,class
21397,Germany's Schulz says he would demand U.S. wit...,BERLIN (Reuters) - The leader of Germany s Soc...,worldnews,"August 23, 2017",1
21398,Blunt instrument? What a list of banned articl...,SHANGHAI (Reuters) - An old review of an acade...,worldnews,"August 23, 2017",1
21399,Saudi police release teenager detained for dan...,DUBAI (Reuters) - A 14-year-old boy who was de...,worldnews,"August 22, 2017",1
21400,"The People's Princess, Britons work to keep me...",LONDON (Reuters) - Abdul Daoud spilt most of t...,worldnews,"August 23, 2017",1
21401,"Argentina labor unions protest job losses, Mac...",BUENOS AIRES (Reuters) - Argentina s main labo...,worldnews,"August 22, 2017",1
21402,Exclusive: Trump's Afghan decision may increas...,ON BOARD A U.S. MILITARY AIRCRAFT (Reuters) - ...,worldnews,"August 22, 2017",1
21403,U.S. puts more pressure on Pakistan to help wi...,WASHINGTON (Reuters) - The United States sugge...,worldnews,"August 21, 2017",1
21404,Exclusive: U.S. to withhold up to $290 million...,WASHINGTON (Reuters) - The United States has d...,worldnews,"August 22, 2017",1
21405,Trump talks tough on Pakistan's 'terrorist' ha...,ISLAMABAD (Reuters) - Outlining a new strategy...,worldnews,"August 22, 2017",1
21406,"U.S., North Korea clash at U.N. forum over nuc...",GENEVA (Reuters) - North Korea and the United ...,worldnews,"August 22, 2017",1


In [26]:
# Merge true and fake news dataset
data_merge = pd.concat([dfake, dtrue], axis=0)
data_merge.head()

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [27]:
data_merge.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [29]:
# Remove unnecessary columns
data = data_merge.drop(['title', 'subject', 'date'], axis=1)
data.isnull().sum()


text     0
class    0
dtype: int64

In [31]:
data.sample(frac=1)

Unnamed: 0,text,class
23366,21st Century Wire asks Will this be the beginn...,0
19460,ROME (Reuters) - British Prime Minister Theres...,1
16872,MADRID (Reuters) - The Spanish government has ...,1
5016,We ve all been concerned with the idea of the ...,0
9666,This is the best yet! A message to Hollywood t...,0
...,...,...
7995,The First Couple greeted an eager crowd celebr...,0
14127,You can blame Obama for the hatred and divisio...,0
15719,WASHINGTON/SEOUL (Reuters) - Saddled with the ...,1
10557,Rep. Debbie Wasserman Schultz dropped a big bo...,0


In [35]:
data.reset_index(inplace=True)
data.drop(['index'], axis=1, inplace=True)
data.columns

Index(['text', 'class'], dtype='object')

In [36]:
data.head()

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0
