# News Mood with Tweepy and vaderSentiment

## Dependencies and Setup

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import json
import tweepy
import time

import warnings
warnings.filterwarnings("ignore")

# Twitter API 
from myconfig import consumer_key, consumer_secret, access_token, access_token_secret

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [4]:
# tweepy API authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

# vader analyzer
analyzer = SentimentIntensityAnalyzer()

## Get data

In [14]:
# Get data for these news sources
sources = ["FoxNews", "CNN", "BBCWorld", "CBSNews", "nytimes"]

# Store data using dictionary to create dataframe easily later
tweet_data = {
    "tweet_source": [],
    "tweet_text": [],
    "tweet_date": [],
    "tweet_vader_score": [],
    "tweet_pos_score": [],
    "tweet_neu_score": [],
    "tweet_neg_score": [],
}

# Pagination
for x in range(5):
    for source in sources:
        # data for tweets
        public_tweets = api.user_timeline(source, page=x)
        for tweet in public_tweets:
            # get data for tweeter, the tweet itself, and the tweet's creation date
            tweet_data["tweet_source"].append(tweet["user"]["name"])
            tweet_data["tweet_text"].append(tweet["text"])
            tweet_data["tweet_date"].append(tweet["created_at"])

            # vader's polarity_scores
            results = analyzer.polarity_scores(tweet["text"])
            tweet_data["tweet_vader_score"].append(results["compound"])
            tweet_data["tweet_pos_score"].append(results["pos"])
            tweet_data["tweet_neu_score"].append(results["neu"])
            tweet_data["tweet_neg_score"].append(results["neg"])

## DataFrame

In [16]:
tweet_df = pd.DataFrame.from_dict(tweet_data)

# Save the file with date
file_name = str(time.strftime("%m-%d-%y")) + "_tweets.csv"
tweet_df.to_csv("analysis/" + file_name, encoding="utf-8")
tweet_df.head()

Unnamed: 0,tweet_source,tweet_text,tweet_date,tweet_vader_score,tweet_pos_score,tweet_neu_score,tweet_neg_score
0,Fox News,BREAKING: Federal appeals court rules against ...,Thu Nov 08 19:18:44 +0000 2018,0.0,0.0,1.0,0.0
1,Fox News,Breaking News: Grand jury indicts captain of M...,Thu Nov 08 17:02:25 +0000 2018,-0.6124,0.115,0.615,0.269
2,Fox News,JUST IN: Police identify gunman who opened fir...,Thu Nov 08 15:14:45 +0000 2018,-0.7783,0.0,0.638,0.362
3,Fox News,JUST IN: Supreme Court Justice Ruth Bader Gins...,Thu Nov 08 14:22:51 +0000 2018,0.7906,0.389,0.611,0.0
4,Fox News,Sgt. Ron Helus was fatally shot when he respon...,Thu Nov 08 13:43:25 +0000 2018,-0.6369,0.0,0.833,0.167


In [19]:
# Convert string dates into datetime objects
tweet_df.dtypes

tweet_source          object
tweet_text            object
tweet_date            object
tweet_vader_score    float64
tweet_pos_score      float64
tweet_neu_score      float64
tweet_neg_score      float64
dtype: object

In [None]:
tweet_df["tweet_date"] = pd.to_datetime(tweet_df["tweet_date"])
