In [None]:
!pip install fasttext



In [None]:
import numpy as np
import pandas as pd
import fasttext
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm

# **Load Data**
Dataset used :
*   Fake and real news dataset ([Clément Bisaillon](https://www.kaggle.com/clmentbisaillon/fake-and-real-news-dataset))
*   Text emotion dataset ([ISEAR](https://www.unige.ch/cisa/research/materials-and-online-research/research-material/))
*   NER (Spacy)



In [None]:
!unzip "/content/drive/My Drive/HOAX/fake-and-real-news-dataset.zip" -d fake-and-real-news-dataset
fake_news = pd.read_csv("/content/fake-and-real-news-dataset/Fake.csv")
real_news = pd.read_csv("/content/fake-and-real-news-dataset/True.csv")

real_news.tail(5)

Archive:  /content/drive/My Drive/HOAX/fake-and-real-news-dataset.zip
replace fake-and-real-news-dataset/Fake.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: fake-and-real-news-dataset/Fake.csv  
replace fake-and-real-news-dataset/True.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: fake-and-real-news-dataset/True.csv  


Unnamed: 0,title,text,subject,date
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017"
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017"
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017"
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017"
21416,Indonesia to buy $1.14 billion worth of Russia...,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,worldnews,"August 22, 2017"


In [None]:
isear = pd.read_excel('/content/drive/My Drive/HOAX/ISEAR.xlsx')
#isear.head(5)

In [None]:
isear.shape

(7666, 42)

In [None]:
isear = isear.drop(isear[isear['SIT'] == '[ No response.]' ].index)
isear = isear.reset_index()

In [None]:
isear.shape

(7588, 43)

In [None]:
isear2 = pd.DataFrame()
isear2[['emot', 'text']] = isear[['Field1','SIT']]
isear2.head(5)

Unnamed: 0,emot,text
0,joy,"During the period of falling in love, each tim..."
1,fear,When I was involved in a traffic accident.
2,anger,When I was driving home after several days of...
3,sadness,When I lost the person who meant the most to me.
4,disgust,The time I knocked a deer down - the sight of ...


# **Preprocessing**

In [None]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
from nltk.tokenize import word_tokenize
import string
string.punctuation
    
stop_and_punc = stopwords.words('english') + (list(string.punctuation))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
def preprocessing(text):
  dummy = text.lower()
  dummy = [w for w in word_tokenize(dummy) if not w in stop_and_punc ]
  dummy = ' '.join(dummy)
  return dummy

In [None]:
fake_news['text'] = fake_news['text'].apply(lambda row : preprocessing(row)) 
real_news['text'] = real_news['text'].apply(lambda row : preprocessing(row)) 
isear2['text'] = isear2['text'].apply(lambda row : preprocessing(row)) 

# **Modelling Emotion**

In [None]:
isear_to_text = []
for i in range(len(isear2['text'])):
  dummy1 = ''.join(['__label__',isear2['emot'][i]])
  dummy2 = ' '.join([dummy1,isear2['text'][i]])
  isear_to_text.append(dummy2)

np.savetxt("isear_to_text.txt", isear_to_text, fmt='%s')

In [None]:
emot_model = fasttext.train_supervised("isear_to_text.txt", epoch=25)

In [None]:
def emot_predict(text):
  emot = emot_model.predict(text, k=7)
  dummy = {}
  for i in range(7):
    key = emot[0][i]
    value = emot[1][i]
    dummy[key] = value
  return dummy

In [None]:
emot_model.predict(fake_news['text'][1], k=7)

(('__label__shame',
  '__label__anger',
  '__label__disgust',
  '__label__guilt',
  '__label__sadness',
  '__label__joy',
  '__label__fear'),
 array([0.60884684, 0.14303184, 0.07852232, 0.0600257 , 0.05181833,
        0.03625125, 0.02157373]))

In [None]:
fake_news_emot =  {key:[] for key in emot_model.labels}
print(fake_news_emot)
for text in fake_news['text']:
  emot = emot_model.predict(text, k=7)
  for i in range(7):
    key = emot[0][i]
    value = emot[1][i]
    fake_news_emot[key].append(value)
fake_news_emot_sum = {key: sum(fake_news_emot[key])/len(fake_news_emot[key]) for key in emot_model.labels}
fake_news_emot_sum

{'__label__joy': [], '__label__fear': [], '__label__anger': [], '__label__disgust': [], '__label__sadness': [], '__label__guilt': [], '__label__shame': []}


{'__label__anger': 0.2025944600934143,
 '__label__disgust': 0.16743049029068136,
 '__label__fear': 0.1285317132987404,
 '__label__guilt': 0.11884587445621948,
 '__label__joy': 0.11094542425220796,
 '__label__sadness': 0.11319813523029154,
 '__label__shame': 0.15852390696241261}

In [None]:
real_news_emot =  {key:[] for key in emot_model.labels}
print(real_news_emot)
for text in real_news['text']:
  emot = emot_model.predict(text, k=7)
  for i in range(7):
    key = emot[0][i]
    value = emot[1][i]
    real_news_emot[key].append(value)
real_news_emot_sum = {key: sum(real_news_emot[key])/len(real_news_emot[key]) for key in emot_model.labels}
real_news_emot_sum

{'__label__joy': [], '__label__fear': [], '__label__anger': [], '__label__disgust': [], '__label__sadness': [], '__label__guilt': [], '__label__shame': []}


{'__label__anger': 0.2774593330605954,
 '__label__disgust': 0.12293872216700742,
 '__label__fear': 0.08956826831519493,
 '__label__guilt': 0.13692051744730688,
 '__label__joy': 0.09954031248016615,
 '__label__sadness': 0.10164523632590902,
 '__label__shame': 0.17199761532514599}

In [None]:
isear2_emot =  {key:[] for key in emot_model.labels}
print(isear2_emot)
for text in isear2['text']:
  emot = emot_model.predict(text, k=7)
  for i in range(7):
    key = emot[0][i]
    value = emot[1][i]
    isear2_emot[key].append(value)
isear2_emot_sum = {key: sum(isear2_emot[key])/len(isear2_emot[key]) for key in emot_model.labels}
isear2_emot_sum

{'__label__joy': [], '__label__fear': [], '__label__anger': [], '__label__disgust': [], '__label__sadness': [], '__label__guilt': [], '__label__shame': []}


{'__label__anger': 0.1438044635675248,
 '__label__disgust': 0.14354660314265452,
 '__label__fear': 0.14442611620229573,
 '__label__guilt': 0.1422502397176614,
 '__label__joy': 0.14350033223815142,
 '__label__sadness': 0.1399820423722812,
 '__label__shame': 0.14256021701621938}

In [None]:
isear2_emot

{'__label__anger': [9.352375491289422e-05,
  1.2330503523116931e-05,
  0.9462026357650757,
  0.09557703882455826,
  0.001513210590928793,
  1.0867278433579486e-05,
  0.0007563892286270857,
  0.008853123523294926,
  1.0028826181951445e-05,
  0.7553945779800415,
  0.06653856486082077,
  0.004761271644383669,
  0.0005077524692751467,
  1.8890632418333553e-05,
  0.007701896596699953,
  0.0001716937986202538,
  0.9767106175422668,
  0.005174505989998579,
  0.07723547518253326,
  0.05110057815909386,
  0.001147440169006586,
  0.24268291890621185,
  1.002177032205509e-05,
  0.8208044767379761,
  0.1549805849790573,
  0.00044540438102558255,
  0.0002600254083517939,
  4.880963751929812e-05,
  3.618269329308532e-05,
  0.01379186101257801,
  0.8799285292625427,
  0.021624907851219177,
  0.030635036528110504,
  0.18529069423675537,
  0.12858252227306366,
  0.00016716003301553428,
  0.003026064718142152,
  0.17957162857055664,
  0.27111175656318665,
  0.17957162857055664,
  0.17957162857055664,
  

# **Modelling NER**

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [None]:
doc = nlp(fake_news['text'][0])
for X in doc.ents:
  print(X.text, X.label_)

In [None]:
labels = [X.label_ for X in doc.ents]
Counter(labels)

Counter({'PERSON': 11,
         'NORP': 2,
         'EVENT': 12,
         'CARDINAL': 3,
         'GPE': 3,
         'DATE': 17,
         'ORG': 3,
         'TIME': 1,
         'ORDINAL': 1})