# Fake news Detection

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

### Inserting dataset

In [2]:
df = pd.read_csv("marge.csv")

In [3]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,title,text,subject,date,class
0,0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


#### "title",  "subject" and "date" columns is not required for detecting the fake news, so I am going to drop the columns.

In [4]:
df = df.drop(["title", "subject", "date"], axis=1)

In [5]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,text,class
0,0,Donald Trump just couldn t wish all Americans ...,0
1,1,House Intelligence Committee Chairman Devin Nu...,0
2,2,"On Friday, it was revealed that former Milwauk...",0
3,3,"On Christmas day, Donald Trump announced that ...",0
4,4,Pope Francis used his annual Christmas Day mes...,0


#### Randomly shuffling the dataframe 

In [6]:
df = df.sample(frac = 1)

In [7]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,text,class
29630,6149,"LANGLEY, Va. (Reuters) - U.S. President Donald...",1
11151,11151,PEACE THROUGH STRENGTH President Reagan showed...,0
2825,2825,Amid one of the most racist presidential campa...,0
29163,5682,ANKARA (Reuters) - Iran’s Foreign Minister Moh...,1
1421,1421,"Children, the most vulnerable of Americans, wi...",0


#### Reset index

In [8]:
df.reset_index(inplace = True)
df.drop(["index"], axis = 1, inplace = True)

In [9]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,text,class
0,6149,"LANGLEY, Va. (Reuters) - U.S. President Donald...",1
1,11151,PEACE THROUGH STRENGTH President Reagan showed...,0
2,2825,Amid one of the most racist presidential campa...,0
3,5682,ANKARA (Reuters) - Iran’s Foreign Minister Moh...,1
4,1421,"Children, the most vulnerable of Americans, wi...",0


#### Creating a function to convert the text in lowercase, remove the extra space, special chr., ulr and links.

In [10]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text

In [11]:
df["text"] = df["text"].apply(wordopt)

In [12]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,text,class
0,6149,langley va reuters u s president donald...,1
1,11151,peace through strength president reagan showed...,0
2,2825,amid one of the most racist presidential campa...,0
3,5682,ankara reuters iran s foreign minister moh...,1
4,1421,children the most vulnerable of americans wi...,0


#### Defining dependent and independent variable as x and y

In [13]:
x = df["text"]
y = df["class"]

In [14]:
x.head(5)

0    langley  va   reuters    u s  president donald...
1    peace through strength president reagan showed...
2    amid one of the most racist presidential campa...
3    ankara  reuters    iran s foreign minister moh...
4    children  the most vulnerable of americans  wi...
Name: text, dtype: object

In [15]:
y.head(5)

0    1
1    0
2    0
3    1
4    0
Name: class, dtype: int64

#### Splitting the dataset into training set and testing set. 

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

#### Convert text to vectors (black box)

In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [18]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

### Logistic Regression

In [19]:
from sklearn.linear_model import LogisticRegression

In [20]:
LR = LogisticRegression()
LR.fit(xv_train,y_train)

LogisticRegression()

In [21]:
LR.score(xv_test, y_test)

0.9864587973273942

# Model Testing With Manual Entry

### News

In [22]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Real News"

def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt) 
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)

    pred_LR = LR.predict(new_xv_test)
    
    return LR.predict_proba(new_xv_test),output_lable(pred_LR[0])

In [25]:
news = str(input("Please Input News Full Text:   "))
manual_testing(news)

Please Input News Full Text:   langley  va   reuters    u s  president donald


(array([[0.10226609, 0.89773391]]), 'Real News')

In [None]:
p

In [1]:
import numpy as np
from PIL import ImageGrab
import cv2
import time

def screen_record(): 
    last_time = time.time()
    while(True):
        # 800x600 windowed mode for GTA 5, at the top left position of your main screen.
        # 40 px accounts for title bar. 
        printscreen =  np.array(ImageGrab.grab(bbox=(0,40,800,640)))
        print('loop took {} seconds'.format(time.time()-last_time))
        last_time = time.time()
        cv2.imshow('window',cv2.cvtColor(printscreen, cv2.COLOR_BGR2RGB))
        if cv2.waitKey(25) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break

screen_record()


ModuleNotFoundError: No module named 'cv2'