In [1]:
#!pip install tweepy
#!pip install fbprophet

In [24]:
import datetime
import nltk
import matplotlib.pyplot as plt
import pandas as pd
import tweepy

from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
from nltk.tokenize import TweetTokenizer
tweet_tokenizer = TweetTokenizer()


In [25]:
# Variables that contains the credentials to access Twitter API
ACCESS_TOKEN = 'Your twitter access token'
ACCESS_SECRET = 'Your twitter access secret'
CONSUMER_KEY = 'Your twitter consumer key'
CONSUMER_SECRET = 'Your twitter consumer secret'


# Setup access to API
def connect_to_twitter_OAuth():
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

    api = tweepy.API(auth)
    return api


# Create API object
api = connect_to_twitter_OAuth()

#verify that you're able to access tweets from twitter 
#public_tweets = api.home_timeline()
#for tweet in public_tweets:
#    print(tweet.text)

In [26]:
# fuction to extract data from tweet object
def extract_tweet_attributes(tweet_object):
    # create empty list
    tweet_list =[]
    # loop through tweet objects
    for tweet in tweet_object:
        tweet_id = tweet.id # unique integer identifier for tweet
        text = tweet.text # utf-8 text of tweet
        favorite_count = tweet.favorite_count
        retweet_count = tweet.retweet_count
        created_at = tweet.created_at # utc time tweet created
        source = tweet.source # utility used to post tweet
        reply_to_status = tweet.in_reply_to_status_id # if reply int of orginal tweet id
        reply_to_user = tweet.in_reply_to_screen_name # if reply original tweetes screenname
        retweets = tweet.retweet_count # number of times this tweet retweeted
        favorites = tweet.favorite_count # number of time this tweet liked
        # append attributes to list
        tweet_list.append({'tweet_id':tweet_id, 
                          'text':text, 
                          'favorite_count':favorite_count,
                          'retweet_count':retweet_count,
                          'created_at':created_at, 
                          'source':source, 
                          'reply_to_status':reply_to_status, 
                          'reply_to_user':reply_to_user,
                          'retweets':retweets,
                          'favorites':favorites})
    # create dataframe   
    df = pd.DataFrame(tweet_list, columns=['tweet_id',
                                           'text',
                                           'favorite_count',
                                           'retweet_count',
                                           'created_at',
                                           'source',
                                           'reply_to_status',
                                           'reply_to_user',
                                           'retweets',
                                           'favorites'])
    return df


In [27]:
data = []
#number of days in each step
delta = datetime.timedelta(days=1)
#start day starts 7 days prior to today
start = datetime.date.today() - 7*delta
#end day starts at today
end = datetime.date.today()

for i in range((end-start).days):
    query = start + i*delta
    #print(query)
    #creates a filename based on cryptocurrency and what day the tweets are from
    filename = 'bitcoin' + str((query)) + '.xls'
    #print(filename)
    #print(query + delta)
    #gets 100 tweets containing the query from twitter
    bitcoin_tweets = api.search(q='bitcoin', until = str(query), count = 100)
    #uses extract_tweet_attibutes method to gather data and stores it into a dataframe
    df = extract_tweet_attributes(bitcoin_tweets)
    #renames the df columns and adds sentiment score and weighted score at the end, later used for analysis
    df = df.reindex(columns=['tweet_id','text', 'favorite_count','retweet_count',
                            'created_at','source','reply_to_status',
                            'reply_to_user','retweets',
                            'favorites', 'sentiment_score', 'weighted_score'])
    #print(df)
    for i in range(len(df.index)):
        #gets the compound score for each tweet in the dataframe and extracts the compound score
        df.sentiment_score.values[i] = sia.polarity_scores(df.text.values[i])["compound"]
    #print(df.sentiment_score)
    denom = df['retweets'].sum()
    for i in range(len(df.index)):
        #creates a weighted average of the sentiment score based on the total number of retweets for all tweets versus the retweets a specific tweet got
        # the more popular the tweet the more weight it will have on the daily weighted sentiment score
        df.weighted_score.values[i] = (df.sentiment_score.values[i] * df.retweets.values[i])/denom
    #print(df.weighted_score.sum())
    #adds each day's weighted seniment score to a seperate list that matches the length of the number of days
    data.append(df.weighted_score.sum())
    #print(weighted.weighted_sentiment.values[i])
    #writes all the data to an excel file
    df.to_excel(filename, index=False, header=True)
#converts the list into a dataframe using the weighted sentiment scores
weighted = pd.DataFrame(data = data, columns=['weighted_sentiment'])
#writes it to a csv file
weighted.to_csv('sentiment_weights.csv', index=False, header=True)

In [28]:
#import cryptocurrency data from yahoo finance
btc_data = pd.read_csv("BTC-USD.csv")
#import the sentiment data from above
weight_data = pd.read_csv("sentiment_weights.csv")
#merge the two data sets that are synced via the data so they match
btc_data = pd.concat([btc_data, weight_data], axis=1, join='inner')
#rename the colums
btc_data = btc_data[["Date", "Close", "weighted_sentiment"]]
#create 2 columns for the difference in closing price/sentiment score from today and the previous day
btc_data = btc_data.reindex(columns = ["Date", "Close", "weighted_sentiment", "Close_Diff", "Sentiment_Diff"])
btc_data.Close_Diff.values[0] = 0.0
btc_data.Sentiment_Diff.values[0] = 0.0

#calculates the closing price difference and sentiment score difference for each day
for i in range(len(btc_data.index)):
    if i == 0:
        continue
    else:
        btc_data.Close_Diff.values[i] = btc_data.Close.values[i] - btc_data.Close.values[i-1]
        btc_data.Sentiment_Diff.values[i] = btc_data.weighted_sentiment.values[i] - btc_data.weighted_sentiment.values[i-1]
btc_data.head(7)
#saves all data into a new csv ready to be imported into crpytocurrency models
btc_data.to_csv('BTC-USD-Modified.csv', index=False, header=True)