In [1]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
import numpy as np
import requests
import datetime
from datetime import datetime, timedelta, date
from dateutil.parser import parse
import matplotlib.pyplot as plt

#Twitter API imports
import tweepy as tw

# NLP & Sentiment imports
import re
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

from dotenv import load_dotenv
load_dotenv()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/Kris/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
############################################################
"""
    Authenticates the Alpaca API and Twitter
    Returns a pass/fail statement
"""
############################################################  
    
# Setting twitter access and api keys
bearer_token = os.getenv("TWITTER_BEARER_TOKEN")
consumer_key= os.getenv("TWITTER_API_KEY")
consumer_secret= os.getenv("TWITTER_SECRET_KEY")
access_token= os.getenv("TWITTER_ACCESS_TOKEN")
access_token_secret= os.getenv("TWITTER_ACCESS_TOKEN_SECRET")

# authentication for twitter
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
twitter_api = tw.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

# test authentication
try:
    twitter_api.verify_credentials()
    auth = "Twitter Authentication Verified"
except:
    auth = "Error During Twitter Authentication"
    
print(auth)

Twitter Authentication Verified


In [3]:

############################################################
"""
    Function that pulls stock data for every ticker symbol on specified exchange
"""
############################################################

#def sp500_ticker_list():
# scrapes the wikipedia page relating to the S&P 500 and returns a list of DataFrame objects
table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

# Since we are only interested in the current list of stocks in the S&P 500, we only need the DataFrame object at index 0
sp500_list_df = table[0]
sp500_tickers = pd.DataFrame(sp500_list_df[["Symbol", "Security"]])

############################################################
"""
    Twitter: Scrape Tweets and Analyze Sentiment
"""
############################################################

def twitter_sentiment(search_word, date_since, items):
       
    # Initialize the VADER sentiment analyzer
    analyzer = SentimentIntensityAnalyzer()

    # initializing the tweets dataframe
    df = []
    
    # adding retweet filter to search words
    search_word = search_word + " -filter:retweets"
    
    # Fetch top tweets/hastags for given ticker
    tweets = tw.Cursor(twitter_api.search,
              q=search_word,
              lang="en",
              since=date_since
                    ).items(items)
    
    for tweet in tweets:
    
        #storing tweet text
        tweet_fetched = tweet.text 

        # Get date of tweet
        tweet_date = pd.Timestamp(tweet.created_at, tz="America/New_York").isoformat()
        
        try:
            sentiment = analyzer.polarity_scores(tweet_fetched)
            compound = sentiment["compound"]
        
            df.append({
                "date": tweet_date,
                "tweet": tweet_fetched,
                "compound": compound,
            })
        
        except AttributeError:
            pass
    
    df = pd.DataFrame(df)
    
    return df

############################################################
"""
    get_twitter_sentiment(search_words):
    Takes 2 argument, word(s) to search tweets for, and number of items (tweets) to return.
    
    Scrapes Twitter for given search words in tweet
    Calculates compound sentiment with VADER sentiment analyzer on each tweet
    Calculates average compound sentiment score each 1 hour
    Normalizes average hourly VADER compound score
    Returns Average Hourly Sentiment Dataframe with Columns: 
"""
############################################################

def get_avg_twitter_sentiment(ticker, search_word):

    # tweepy variables
    search_word = search_word
    date_since = "2021-01-01"
    items = 500
    twitter_search_phrase = search_word + " OR " + ticker

    # call the twitter sentiment function and return a dataframe
    tweets_df = twitter_sentiment(twitter_search_phrase, date_since, items)

    # Changes the date column to proper datetime format
    tweets_df["date"] = pd.to_datetime(tweets_df['date'])
    
    # Grouping the tweets by Hour and taking their average Hourly sentiment
    avg_sentiment = tweets_df.groupby(pd.Grouper(key='date', freq='H')).mean().dropna()

    
    return avg_sentiment

############################################################
"""
    Runs a VADER twitter sentiment for a given ticker symbol.
    Returns a dataframe with the ticker symbol and average compound VADER score for a time period of 15 minutes.
"""
############################################################

def compound_twitter_sentiment(ticker, search_word):
    
    latest_twitter_sentiment = get_avg_twitter_sentiment(ticker, search_word)
    twitter_sentiment_score = latest_twitter_sentiment.iloc[-1]
    
    return twitter_sentiment_score

In [100]:
# for testing


#ticker = "MSFT"
#search_word = "Microsoft"

#score = compound_twitter_sentiment(ticker, search_word)
#score

compound    0.13209
Name: 2021-02-28 10:00:00-05:00, dtype: float64