In [2]:
# Import packages
import tweepy
import json
import pandas as pd
import requests
import numpy as np
import matplotlib.pyplot as plt

from config import consumer_key, consumer_secret, access_token, access_token_secret
from datetime import datetime
from pprint import pprint
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Setup Tweepy Authentication 
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [3]:
#Data for New York Times
ny = "@nytimes"

ny_list = []
counter = 1
oldest_tweet = None

for x in range(1, 6):
    public_tweets = api.user_timeline(ny, max_id=oldest_tweet)

    for tweets in public_tweets:
        # Converting time to integer
        raw_time = tweets["created_at"]
        convert = datetime.strptime(raw_time, "%a %b %d %H:%M:%S %z %Y")

        news_text = tweets["text"]
        news_source = tweets["user"]["name"]
        news_time = convert

        results = analyzer.polarity_scores(tweets["text"])
        compound= results["compound"]
        pos= results["pos"]
        neg= results["neg"]
        neu= results["neu"]

        oldest_tweet = tweets['id'] - 1
        
        #Storing into the empty lists
        ny_list.append({
            "Source": news_source,
            "Tweet Time": news_time,
            "Compound": compound,
            "Tweet": news_text,
            "Positive": pos,
            "Negative": neg,
            "Neutral": neu,
            "Tweets Ago": counter

    })

        counter +=1
ny_data = pd.DataFrame(ny_list)
ny_data = ny_data.sort_values("Tweets Ago", ascending=False)
ny_data.head()

Unnamed: 0,Compound,Negative,Neutral,Positive,Source,Tweet,Tweet Time,Tweets Ago
99,0.0,0.0,1.0,0.0,The New York Times,A statement issued by the White House around 2...,2018-10-04 07:06:20+00:00,100
98,0.0,0.0,1.0,0.0,The New York Times,A small lander from Japan's Hayabusa2 spacecra...,2018-10-04 07:15:04+00:00,99
97,0.5106,0.0,0.87,0.13,The New York Times,RT @peterbakernyt: White House transmits FBI i...,2018-10-04 07:16:14+00:00,98
96,0.4404,0.0,0.822,0.178,The New York Times,“Stripping her of her honorary citizenship may...,2018-10-04 07:30:05+00:00,97
95,0.0,0.0,1.0,0.0,The New York Times,"In an opinion piece in The Washington Post, wr...",2018-10-04 07:45:07+00:00,96


In [4]:
#Data for New York Times
cbs = "@CBS"

cbs_list = []
counter = 1
oldest_tweet = None

for x in range(1, 6):
    public_tweets = api.user_timeline(cbs, max_id=oldest_tweet)

    for tweets in public_tweets:
        # Converting time to integer
        raw_time = tweets["created_at"]
        convert = datetime.strptime(raw_time, "%a %b %d %H:%M:%S %z %Y")

        news_text = tweets["text"]
        news_source = tweets["user"]["name"]
        news_time = convert

        results = analyzer.polarity_scores(tweets["text"])
        compound= results["compound"]
        pos= results["pos"]
        neg= results["neg"]
        neu= results["neu"]

        oldest_tweet = tweets['id'] - 1
        
        #Storing into the empty lists
        cbs_list.append({
            "Source": news_source,
            "Tweet Time": news_time,
            "Compound": compound,
            "Tweet": news_text,
            "Positive": pos,
            "Negative": neg,
            "Neutral": neu,
            "Tweets Ago": counter

    })

        counter +=1
cbs_data = pd.DataFrame(cbs_list)
cbs_data = cbs_data.sort_values("Tweets Ago", ascending=False)
cbs_data.head()

Unnamed: 0,Compound,Negative,Neutral,Positive,Source,Tweet,Tweet Time,Tweets Ago
99,0.0,0.0,1.0,0.0,CBS,"RT @MagnumPICBS: ""What we are trying to do is ...",2018-08-05 23:41:21+00:00,100
98,0.8316,0.0,0.694,0.306,CBS,"RT @MagnumPICBS: ""There are a lot of easter eg...",2018-08-05 23:44:39+00:00,99
97,0.0,0.0,1.0,0.0,CBS,"RT @MagnumPICBS: ""If there was ever a time to ...",2018-08-05 23:58:41+00:00,98
96,0.5093,0.0,0.859,0.141,CBS,That's all for our #CBS #TCA18 panels! Tune in...,2018-08-06 00:02:02+00:00,97
95,0.3818,0.0,0.894,0.106,CBS,RT @TheNeighborhood: Find out why Cedric the E...,2018-08-06 00:02:58+00:00,96


In [5]:
#Data for New York Times
bbc = "@BBC"

bbc_list = []
counter = 1
oldest_tweet = None

for x in range(1):
    public_tweets = api.user_timeline(bbc, max_id=oldest_tweet)

    for tweets in public_tweets:
        # Converting time to integer
        raw_time = tweets["created_at"]
        convert = datetime.strptime(raw_time, "%a %b %d %H:%M:%S %z %Y")

        news_text = tweets["text"]
        news_source = tweets["user"]["name"]
        news_time = convert

        results = analyzer.polarity_scores(tweets["text"])
        compound= results["compound"]
        pos= results["pos"]
        neg= results["neg"]
        neu= results["neu"]

        oldest_tweet = tweets['id'] - 1
        
        #Storing into the empty lists
        bbc_list.append({
            "Source": news_source,
            "Tweet Time": news_time,
            "Compound": compound,
            "Tweet": news_text,
            "Positive": pos,
            "Negative": neg,
            "Neutral": neu,
            "Tweets Ago": counter

    })

        counter +=1
bbc_data = pd.DataFrame(bbc_list)
bbc_data = bbc_data.sort_values("Tweets Ago", ascending=False)
bbc_data.head()

Unnamed: 0,Compound,Negative,Neutral,Positive,Source,Tweet,Tweet Time,Tweets Ago
19,0.4404,0.0,0.868,0.132,BBC,❤️ Couples will soon be able to choose a civil...,2018-10-03 16:55:01+00:00,20
18,0.5411,0.0,0.851,0.149,BBC,RT @BBCnireland: ✍ @BBCApprentice is back toni...,2018-10-03 17:03:42+00:00,19
17,0.0,0.0,1.0,0.0,BBC,🎧❤️ Dementia playlists are making such a big d...,2018-10-03 17:59:01+00:00,18
16,0.4939,0.0,0.819,0.181,BBC,The search for Lord Sugar's next business part...,2018-10-03 19:04:04+00:00,17
15,-0.5994,0.231,0.769,0.0,BBC,3 homeless people die every week in the UK.\nM...,2018-10-04 06:03:01+00:00,16


In [6]:
#Data for New York Times
cnn = "@CNN"

cnn_list = []
counter = 1
oldest_tweet = None

for x in range(1, 6):
    public_tweets = api.user_timeline(cnn, max_id=oldest_tweet)

    for tweets in public_tweets:
        # Converting time to integer
        raw_time = tweets["created_at"]
        convert = datetime.strptime(raw_time, "%a %b %d %H:%M:%S %z %Y")

        news_text = tweets["text"]
        news_source = tweets["user"]["name"]
        news_time = convert

        results = analyzer.polarity_scores(tweets["text"])
        compound= results["compound"]
        pos= results["pos"]
        neg= results["neg"]
        neu= results["neu"]

        oldest_tweet = tweets['id'] - 1
        
        #Storing into the empty lists
        cnn_list.append({
            "Source": news_source,
            "Tweet Time": news_time,
            "Compound": compound,
            "Tweet": news_text,
            "Positive": pos,
            "Negative": neg,
            "Neutral": neu,
            "Tweets Ago": counter

    })

        counter +=1
cnn_data = pd.DataFrame(cnn_list)
cnn_data = cnn_data.sort_values("Tweets Ago", ascending=False)
cnn_data.head()

Unnamed: 0,Compound,Negative,Neutral,Positive,Source,Tweet,Tweet Time,Tweets Ago
99,0.4019,0.0,0.828,0.172,CNN,Researchers say this new font could help you r...,2018-10-04 16:15:48+00:00,100
98,0.34,0.0,0.855,0.145,CNN,Amazon giveth and Amazon taketh away.\n\nThe c...,2018-10-04 16:25:36+00:00,99
97,0.0,0.0,1.0,0.0,CNN,Progressive groups are set to take an anti-Kav...,2018-10-04 16:35:06+00:00,98
96,0.3182,0.0,0.897,0.103,CNN,It's Amazon's world. We just live in it. @CNNB...,2018-10-04 16:41:07+00:00,97
95,0.6486,0.0,0.73,0.27,CNN,CNN Business went inside Amazon HQ to see how ...,2018-10-04 16:52:00+00:00,96


In [7]:
#Data for New York Times
fox = "@FOXTV"

fox_list = []
counter = 1
oldest_tweet = None

for x in range(1, 6):
    public_tweets = api.user_timeline(fox, max_id=oldest_tweet)

    for tweets in public_tweets:
        # Converting time to integer
        raw_time = tweets["created_at"]
        convert = datetime.strptime(raw_time, "%a %b %d %H:%M:%S %z %Y")

        news_text = tweets["text"]
        news_source = tweets["user"]["name"]
        news_time = convert


#             if (tweets["user"]["followers_count"] < max_followers
#                 and tweets["user"]["statuses_count"] > min_tweets
#                 and tweets["user"]["statuses_count"] < max_tweets
#                 and tweets["user"]["friends_count"] < max_following
#                 and tweets["user"]["lang"] == lang):

        results = analyzer.polarity_scores(tweets["text"])
        compound= results["compound"]
        pos= results["pos"]
        neg= results["neg"]
        neu= results["neu"]

        oldest_tweet = tweets['id'] - 1
        
        #Storing into the empty lists
        fox_list.append({
            "Source": news_source,
            "Tweet Time": news_time,
            "Compound": compound,
            "Tweet": news_text,
            "Positive": pos,
            "Negative": neg,
            "Neutral": neu,
            "Tweets Ago": counter

    })

        counter +=1
fox_data = pd.DataFrame(fox_list)
fox_data = fox_data.sort_values("Tweets Ago", ascending=False)
fox_data.head()

Unnamed: 0,Compound,Negative,Neutral,Positive,Source,Tweet,Tweet Time,Tweets Ago
99,0.0,0.0,1.0,0.0,FOX,@LandonLarue @gofooji @911onFOX @ResidentFOX @...,2018-10-01 19:31:11+00:00,100
98,0.0,0.0,1.0,0.0,FOX,@andyhio3 @gofooji @911onFOX @ResidentFOX @and...,2018-10-01 19:31:15+00:00,99
97,0.0,0.0,1.0,0.0,FOX,@JFWmass @gofooji @911onFOX @ResidentFOX @JFWm...,2018-10-01 19:31:50+00:00,98
96,0.0,0.0,1.0,0.0,FOX,@BhuwanThakur4 @gofooji @911onFOX @ResidentFOX...,2018-10-01 19:32:02+00:00,97
95,0.0,0.0,1.0,0.0,FOX,@ValOhSooCute @gofooji @911onFOX @ResidentFOX ...,2018-10-01 19:32:52+00:00,96


In [8]:
ny_plot= ny_data.plot(kind="scatter", x="Tweets Ago", y="Compound", color="yellow", s=80, label="New York Times", edgecolor="black", alpha=0.7)
cbs_data.plot(kind="scatter", x="Tweets Ago", y="Compound", ax=ny_plot, color="green", s=80, label="CBS", edgecolor="black", alpha=0.7)
bbc_data.plot(kind="scatter", x="Tweets Ago", y="Compound", ax=ny_plot, color="lightblue", s=80, label="BBC", edgecolor="black", alpha=0.7)
fox_data.plot(kind="scatter", x="Tweets Ago", y="Compound", ax=ny_plot, color="blue", s=80, label="FOX", edgecolor="black", alpha=0.7)
cnn_data.plot(kind="scatter", x="Tweets Ago", y="Compound", ax=ny_plot, color="red", s=80, label="CNN", edgecolor="black", alpha=0.7)

# Cleaning the data
plt.grid()
plt.legend(title="Media Sources", loc='center left', bbox_to_anchor=(1.0, 0.5))
now = datetime.now()
now = now.strftime("%Y-%m-%d %H:%M")
plt.title(f"Sentiment Analysis on Media Tweets ({now})")
plt.ylabel("Tweet Polarity")
plt.savefig("Homework Images/sentiment_analysis.png")
plt.show()

In [9]:
average_bbc = bbc_data["Compound"].mean()
average_cbs = cbs_data["Compound"].mean()
average_cnn = cnn_data["Compound"].mean()
average_ny = ny_data["Compound"].mean()
average_fox = fox_data["Compound"].mean()

average_data = {"BBC": average_bbc,
                "CBS": average_cbs,
                "CNN": average_cnn,
                "NYT": average_ny,
                "Fox": average_fox}

df = pd.DataFrame({"Sources":["BBC","CBS","CNN","NYT","Fox"],
                   "Tweet Polarity":[average_bbc, average_cbs ,average_cnn ,average_ny, average_fox]})
df

Unnamed: 0,Sources,Tweet Polarity
0,BBC,0.02462
1,CBS,0.165103
2,CNN,0.030279
3,NYT,0.037937
4,Fox,0.01497


In [10]:
df.plot(kind="bar", x="Sources", y="Tweet Polarity", rot=0, figsize=(8,7), legend=False)
plt.xlabel("")
plt.ylabel("Tweet Polarity")
plt.grid()
plt.savefig("Homework Images/averages.png")
plt.show()

In [11]:
ny_data.to_csv("Resources/ny_data.csv")
bbc_data.to_csv("Resources/bbc_data.csv")
cnn_data.to_csv("Resources/cnn_data.csv")
cbs_data.to_csv("Resources/cbs_data.csv")
fox_data.to_csv("Resources/fox_data.csv")