Import necessary libraries and functions

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import word_tokenize
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/christine/hueco_mundo/hueco_mundo/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Use custom web scraping function to extract headlines from Fox news' website as csv, compile all headlines into a single DataFrame

In [2]:
from Scrape import MakeHoney
from csv2list import csv2list

#scrape = MakeHoney(word_thresh=8,save_as='fox_scrape09292021')

files = ['scraped_pages/fox_scrape09092021.csv','scraped_pages/fox_scrape09102021.csv',
         'scraped_pages/fox_scrape09112021.csv','scraped_pages/fox_scrape09122021.csv',
         'scraped_pages/fox_scrape09122021B.csv','scraped_pages/fox_scrape09132021.csv',
         'scraped_pages/fox_scrape09142021.csv','scraped_pages/fox_scrape09152021.csv',
         'scraped_pages/fox_scrape09202021.csv','fox_scrape09212021.csv','fox_scrape09242021.csv',
         'fox_scrape09252021.csv','fox_scrape09292021.csv']

Fox = pd.concat(map(pd.read_csv,files))
Fox = Fox.drop(columns=['Unnamed: 0'])
Fox = Fox.drop_duplicates()

type(Fox)

pandas.core.frame.DataFrame

Use NLTK's VADER algorithm to produce compound sentiment/polarity scores for each headline 

In [3]:
vader = SentimentIntensityAnalyzer()

Fox['Scores'] = Fox['Headline'].apply(lambda x: vader.polarity_scores(x))
Fox['Score'] = Fox['Scores'].apply(lambda dict: dict['compound'])


#Fox.head(20)

Apply TextBlob's subjectivity metrics to the headlines, and include polarity scores for robustness

In [4]:
from textblob import TextBlob

fox = Fox.loc[:,['Headline']]

fox['Polarity'] = fox['Headline'].apply(lambda x: TextBlob(x).sentiment.polarity)
fox['Subjectivity'] = fox['Headline'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
fox['VADER_compound'] = Fox['Score']
#fox.head(50)

Average the TextBlob and VADER polarity metrics and assign it to it's own column

In [5]:
import numpy as np

pd.set_option('display.max_colwidth',None)

fox['avg_polarity'] = (fox.Polarity + fox.VADER_compound)/2
fox.head(30)

Unnamed: 0,Headline,Polarity,Subjectivity,VADER_compound,avg_polarity
0,Biden admin trying to reverse all of Trump's accomplishments: Tammy Bruce,0.0,0.0,0.0,0.0
1,Chip Roy: Fentanyl overdoses skyrocketing because Biden refuses to secure the border,0.4,0.6,0.34,0.37
2,Rep. Mike Gallagher on why Dr. Anthony Fauci must resign,0.0,0.0,-0.34,-0.17
3,‘The Lost Calls of 9/11’ debuts on Fox Nation,0.0,0.0,-0.3182,-0.1591
4,Former CIA senior intel officer says moral obligation in Afghanistan is to get US allies out,0.0,0.125,0.0,0.0
5,Trump praises Gutfeld: Your ratings are great but beating untalented people,0.8,0.75,-0.0644,0.3678
6,Chris Wallace talks bringing 9/11 architect to justice,0.0,0.0,0.5267,0.26335
7,Immunologist analyzes messaging from Biden administration on vaccines,0.0,0.0,0.0,0.0
8,Kayleigh McEnany shreds 'Orwellian dictatorial tactics and edicts of the left',0.0,0.0,0.0,0.0
9,Exclusive whistleblower account: Afghan refugees leaving U.S. bases without being fully vetted,0.0,0.0,0.128,0.064


Evaluate headline metrics:
    >Average Subjectivity across all headlines
    >Average Subjectivity of all headlines with subjectivity scores above 0
    >Average Polarity across all headlines
    >Average Polarity of all headlines with polarity scores above 0

In [6]:
avg_subjectivity = np.mean(fox['Subjectivity'])
print('Average Subjectivity of all Headlines: ',avg_subjectivity)

Average Subjectivity of all Headlines:  0.26125676215229854


In [7]:
subj_not_0 = fox.loc[fox['Subjectivity'] != 0, 'Subjectivity']
avg_subj_not_0 = np.mean(subj_not_0)
print('Average Subjectivity of Subjective Headlines: ',avg_subj_not_0)

Average Subjectivity of Subjective Headlines:  0.4493616309019534


In [8]:
avg_polarity = np.mean(fox['avg_polarity'])
print('Average Sentiment of all Headlines: ',avg_polarity)

Average Sentiment of all Headlines:  -0.0318295674793518


In [9]:
polarity_not_0 = fox.loc[fox['avg_polarity'] != 0,'avg_polarity']
avg_polarity_not_0 = np.mean(polarity_not_0)
print('Average Sentiment of Headlines with Nonzero Polarity: ',avg_polarity_not_0)

Average Sentiment of Headlines with Nonzero Polarity:  -0.04017880061550753


Evaluate subjective headline distribution

In [10]:
high_subjectivity = fox.loc[fox['Subjectivity'] >= 0.5, ['Headline','Subjectivity']]
high_subjectivity.head(30)

print('Num Headlines: ' + str(len(fox)))
print('Highly Subjective Headlines: ' + str(len(high_subjectivity)))

print('Highly Subjective Proportion: ' + str(len(high_subjectivity)/len(fox)))

Num Headlines: 1333
Highly Subjective Headlines: 332
Highly Subjective Proportion: 0.2490622655663916


In [11]:
objective = fox[fox['Subjectivity'] == 0]
print('Objective Headlines: ',len(objective))

Objective Headlines:  558


Define binary column to describe if each headline is sentiment-neutral

In [12]:
def neu(x):
    if 0.05 > x > -0.05:
        return 1
    else:
        return 0
    
foxy = fox.loc[:, ['Headline','avg_polarity','Subjectivity']]
foxy['Neutral'] = foxy['avg_polarity'].apply(lambda x: neu(x))
#foxy = foxy.drop(columns=['avg_polarity'])

foxy.head()

Unnamed: 0,Headline,avg_polarity,Subjectivity,Neutral
0,Biden admin trying to reverse all of Trump's accomplishments: Tammy Bruce,0.0,0.0,1
1,Chip Roy: Fentanyl overdoses skyrocketing because Biden refuses to secure the border,0.37,0.6,0
2,Rep. Mike Gallagher on why Dr. Anthony Fauci must resign,-0.17,0.0,0
3,‘The Lost Calls of 9/11’ debuts on Fox Nation,-0.1591,0.0,0
4,Former CIA senior intel officer says moral obligation in Afghanistan is to get US allies out,0.0,0.125,1


In [13]:
foxy.Neutral.value_counts()

headlines = [headline for headline in foxy['Headline']]

from sklearn.feature_extraction.text import CountVectorizer

mix_gram = CountVectorizer(ngram_range=(1,2),max_features=5)
doc_mtx = mix_gram.fit_transform(headlines)

Create custom dataset to evaluate context surrounding a given word or words

In [14]:
df = fox.loc[:,['Headline','avg_polarity']]

biden = []
for headline in df['Headline']:
    list1 = ['biden']
    list2 = ['Biden']
    list3 = ['biden','vaccines']
    list4 = ['biden','vaccinations']
    hline = word_tokenize(str(headline))
    #list2 = ['Biden','COVID-19']
    if all(item in hline for item in list1):
        biden.append(headline)
    elif all(item in hline for item in list2):
        biden.append(headline)
    #elif all(item in hline for item in list3):
        #biden.append(headline)
    #elif all(item in hline for item in list4):
        #biden.append(headline)
    

len(biden)

172

In [15]:

#for headline in df['Headline']:
    #if 'Afghanistan' and 'Biden' in word_tokenize(str(headline)):
        #biden.append(headline)
    #elif 'America' in word_tokenize(str(headline)):
        #biden.append(headline)
    #if 'Biden' in word_tokenize(str(headline)):
        #biden.append(headline)
        

    #elif all(item in headline for item in list2):
        #biden.append(headline)
        
        
Biden = pd.DataFrame(biden)
Biden.columns = ['Headline']
Biden['Polarity_VADER'] = Biden['Headline'].apply(lambda x: vader.polarity_scores(x))
Biden['Polarity_VADER'] = Biden['Polarity_VADER'].apply(lambda dict: dict['compound'])
Biden['Polarity_TextBlob'] = Biden['Headline'].apply(lambda x: TextBlob(x).sentiment.polarity)
Biden['Polarity'] = (Biden['Polarity_VADER'] + Biden['Polarity_TextBlob'])/2
Biden = Biden.drop(columns=['Polarity_VADER','Polarity_TextBlob'])
Biden['Subjectivity'] = Biden['Headline'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
Biden

Unnamed: 0,Headline,Polarity,Subjectivity
0,Biden admin trying to reverse all of Trump's accomplishments: Tammy Bruce,0.000000,0.000000
1,Chip Roy: Fentanyl overdoses skyrocketing because Biden refuses to secure the border,0.370000,0.600000
2,Immunologist analyzes messaging from Biden administration on vaccines,0.000000,0.000000
3,Biden admin throws full force of federal government at state over abortion law,0.175000,0.550000
4,Biden officials praised Bergdahl prisoner swap that released terrorists now in Taliban cabinet,-0.329850,0.000000
...,...,...,...
167,"Jason Chaffetz: If Biden, Dems' $3.5T cradle-to-grave welfare state happens here's what America will be like",0.180600,0.000000
168,Rep. Mike Turner: Biden's failed Afghanistan drone strike begs questions Gen. Milley must answer,-0.542950,0.300000
169,Greg Gutfeld: What happened to the adults we were promised in the Biden administration?,0.180600,0.000000
170,Biden team 'clearly' didn't listen to military commanders on Afghanistan: Grant,0.180600,0.241667


Evaluate average polarity and subjectivity of topic

In [16]:
avg_biden_polarity = np.mean(Biden['Polarity'])
avg_biden_subjectivity = np.mean(Biden['Subjectivity'])
print('Polarity: ',avg_biden_polarity)
print('Subjectivity: ',avg_biden_subjectivity)
len(Biden)

Polarity:  -0.09769454574817946
Subjectivity:  0.24133654862579285


172

Sort headlines by Subjectivity rating in descending order

In [17]:
foxy_sentiment = foxy.loc[:,['Headline','avg_polarity','Subjectivity']].sort_values('Subjectivity', ascending=False)
foxy_sentiment.head()

Unnamed: 0,Headline,avg_polarity,Subjectivity
154,"Justice Breyer opposes politics surrounding Supreme Court, supports one possible reform",0.42955,1.0
4,Cox: The recall election is necessary because California is 'sufferring',0.0,1.0
23,Democrats may go for broke this fall with their massive spending package. Will voters reward them?,0.11315,1.0
12,Barbara Olson fought for life until her final minutes onboard hijacked flight 77,-0.1591,1.0
2,Press should be outraged over Taliban's treatment of journalists: Concha,-0.72115,1.0
