In [None]:
#!pip install vaderSentiment 

In [None]:
# Dependencies
import tweepy
import os
import numpy as np
import pandas as pd
import datetime

# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [None]:
# Get the current working directory
orig_working_directory = os.getcwd()
print(orig_working_directory)

In [None]:
# get back up 2 level to be on Desktop first, then navigate to 'gwu-' directory which contains config.py inside
os.chdir(os.path.join('..','..','gwu-arl-data-pt-03-2020-u-c'))

# Now, you can see the new working directory
curr_working_directory = os.getcwd()
os.getcwd()

In [None]:
# Twitter API Keys
from config import (consumer_key, 
                    consumer_secret, 
                    access_token, 
                    access_token_secret)

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True) # This'll make the rest of the code obey the rate limit. StackOverFlow

In [None]:
# Target Search Term ----- BBC, CBS, CNN, Fox, and New York times
target_terms = ("@BBC", "@CBS", "@CNN", "@Fox", "@New York times")

# Appended Lists
tweets_list = []
search_term_list = []

# Loop thru all target users
for target in target_terms:
    
    # Iterate thru the ---most recent 100 tweets on target users---
     for tweet in tweepy.Cursor(api.search, target, tweet_mode='extended').items(100):
        tweets_list.append(tweet)    
        search_term_list.append(target)

        #tweets_list[0]

        user_list = []
        text_list = []
        createdOn_list = []

        compound_list = []
        positive_list = []
        negative_list = []
        neutral_list = []

        # Loop through all tweets
        for tweet in tweets_list:
                    
            # Run VADER Analysis on each tweet
            tweet_user = tweet.user.screen_name
            tweet_text = tweet.full_text
            tweet_created = tweet.created_at      
        
            # Run sentiments analysis using --tweet.full_text--
            results = analyzer.polarity_scores(tweet_text)
            com = results["compound"]
            pos = results["pos"]
            neg = results["neg"]
            neu = results["neu"]

            # Store each value to the appropriate list created above
            user_list.append(tweet_user)
            text_list.append(tweet_text)
            createdOn_list.append(tweet_created)
        
            compound_list.append(com)
            positive_list.append(pos)
            negative_list.append(neg)
            neutral_list.append(neu)

In [None]:
result_df = pd.DataFrame({
    'User': user_list,
    'Search Term': search_term_list,
    'Tweet Text': text_list,
    'Created on': createdOn_list,
    'Compound': compound_list,
    'Positive': positive_list,
    'Negative': negative_list,
    'Neutral': neutral_list,
})

pd.set_option('display.max_colwidth', -1)

In [None]:
#result_sorted = result_df.sort_values(by=['Created on'])

In [None]:
result_df['Timestamps'] = result_df['Created on'].values.astype(np.int) // 10 ** 9
result_df.head()

In [None]:
result_df.to_csv('../sentiment_analysis.csv', index=False)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#ts_max = result_df['Timestamps'].max()
#ts_min = result_df['Timestamps'].min()
#print(ts_max, ts_min)

In [None]:
# Filter each News channel to work on ---@BBC, @CBS, @CNN, @Fox, @New York times---
bbc = result_df.loc[result_df['Search Term']=='@BBC', :]
cbs = result_df.loc[result_df['Search Term']=='@CBS', :]
cnn = result_df.loc[result_df['Search Term']=='@CNN', :]
fox = result_df.loc[result_df['Search Term']=='@Fox', :]
nyt = result_df.loc[result_df['Search Term']=='@New York times', :]

In [None]:
# convert DATE&TIME to Timestamps
#bbc['bbc_ts'] = bbc['Created on'].values.astype(np.int) // 10 ** 9
#cbs['cbs_ts'] = cbs['Created on'].values.astype(np.int) // 10 ** 9
#cnn['cnn_ts'] = cnn['Created on'].values.astype(np.int) // 10 ** 9
#fox['fox_ts'] = fox['Created on'].values.astype(np.int) // 10 ** 9
#nyt['nyt_ts'] = nyt['Created on'].values.astype(np.int) // 10 ** 9

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

ax.scatter(bbc['Timestamps'], bbc['Compound'], color="hotpink", alpha=0.8)
ax.scatter(cbs['Timestamps'], cbs['Compound'], color="gold", alpha=0.8)
ax.scatter(cnn['Timestamps'], cnn['Compound'], color="seagreen", alpha=0.8)
ax.scatter(fox['Timestamps'], fox['Compound'], color="royalblue", alpha=0.8)
ax.scatter(nyt['Timestamps'], nyt['Compound'], color="mediumvioletred", alpha=0.8)

ax.set_ylim(-1, 1)
#ax.set_xlim(100, 0)
ax.set_xlabel('News Channels', fontsize=13)
ax.set_ylabel('Scores', fontsize=13)
ax.set_title('Sentiment Intensity Score for each News Organizations', fontsize=15)

ax.grid(alpha=.2)
fig.tight_layout()

plt.show()

In [None]:
bbc_av = bbc['Compound'].mean()
cbs_av = cbs['Compound'].mean()
cnn_av = cnn['Compound'].mean()
fox_av = fox['Compound'].mean()
nyt_av = nyt['Compound'].mean()

index = ["@BBC", "@CBS", "@CNN", "@Fox", "@New York times"]

df = pd.DataFrame({'Channels': ['BBC', 'CBS', 'CNN', 'FOX', 'NYT'], 
                   'Tweet Polarity': [bbc_av, cbs_av, cnn_av, fox_av, nyt_av]})

ax = df.plot.bar(x='Channels', y='Tweet Polarity', rot=0)

ax.set_xlabel('News Channels', fontsize=13)
ax.set_ylabel('Scores', fontsize=13)
ax.set_title('Overall Media Sentiment based on Twitter (May 22nd, 2020)', fontsize=14)
fig.tight_layout()

ax.set_ylim(-0.1, 0.3)