In [1]:
import numpy as np
import pandas as pd
import json
import tweepy
import seaborn as sns
import os
import requests as req
import calendar
import pytz
import time
import got3

from datetime import datetime, date, timedelta

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()



In [2]:
api_dir = os.path.dirname(os.path.dirname(os.path.realpath('keys')))
file_name = os.path.join(api_dir + "//keys", "api_keys.json")
data = json.load(open(file_name))

gkey = data['google_api_key']
consumer_key = data['twitter_consumer_key']
consumer_secret = data['twitter_consumer_secret']
access_token = data['twitter_access_token']
access_token_secret = data['twitter_access_token_secret']


In [3]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())



In [4]:
#coin list by symbol in descending order of market cap
#bitcoin - ethereum - ripple - bitcoin cash - cardano - litecoin - stella - neo - eos - xem  
coin_list = ['BTC', 'ETH', 'XRP', 'BCH', 'ADA', 'LTC', 'XLM', 'NEO', 'EOS', 'XEM']



In [5]:
#URLs
social_stats_url = "https://www.cryptocompare.com/api/data/socialstats/?id="

coin_list_url = "https://www.cryptocompare.com/api/data/coinlist/"

coin_historical_url = "https://min-api.cryptocompare.com/data/pricehistorical?fsym="
coin_historical_url_second = "&tsyms=USD&ts="



In [6]:
#variables 
coin_list_response = []

coin_data_df = pd.DataFrame(columns=["symbol", "date", "coin_name", "coin_price", 
                                     "compound_average",
                                     "positive_average", "neutral_average", "negative_average"
                                   ])



In [7]:
#date
year = 2018
month = 1
day = 1

#sampling max number of tweets per day per coin
max_tweets = 25

In [None]:
def GetCoinList():
    response = req.get(coin_list_url).json()

    count = 0
    for each in response["Data"]:
        count = count + 1
    
    print(count)
    return response


def GetCoinId(symbol):
    for each in coin_list_response["Data"]:
        row = coin_list_response["Data"][symbol]
        if row["Name"] == symbol:
            return int(row["Id"])


def GetCoinName(symbol):
    for each in coin_list_response["Data"]:
        row = coin_list_response["Data"][symbol]
        if row["Name"] == symbol:
            return row["CoinName"].lower()

               
def GetHistoricalPrice(symbol, utctime):
    #url to get historical price
    url = coin_historical_url + symbol + coin_historical_url_second + str(utctime)
    #get the historical price
    price = req.get(url).json()
    return float(price[symbol]["USD"])


def GetSocialStats(coin_id):
    url = social_stats_url + coin_id 
    return req.get(url).json()


def GetUtcTime(year,month,day):
    utc = calendar.timegm(datetime(year,month,day).utctimetuple())
    return utc

    
def AnalyzeTweets(coin_name, until_date):
   
    tweetCriteria = got3.manager.TweetCriteria().setUntil(until_date).setQuerySearch(coin_name).setMaxTweets(max_tweets)

    for i in range(max_tweets):
        #each tweet
        tweet = got3.manager.TweetManager.getTweets(tweetCriteria)[i]      
        #run Vader analysis
        compound = analyzer.polarity_scores(tweet.text)["compound"]
        pos = analyzer.polarity_scores(tweet.text)["pos"]
        neu = analyzer.polarity_scores(tweet.text)["neu"]
        neg = analyzer.polarity_scores(tweet.text)["neg"]
        #add each score to an appropriate array
        compound_list.append(compound)
        positive_list.append(pos)
        negative_list.append(neg)
        neutral_list.append(neu)





In [None]:
#get coin list
coin_list_response = GetCoinList()

count = 0

#build coin data
while count < 31:
    #get UTC time, one day at a time
    utc_time = GetUtcTime(year,month,day)
    until_date = date(year,month,day).strftime("%Y-%m-%d")  
 
    #process one coin at a time
    for symbol in coin_list:
        coin_name = GetCoinName(symbol)
        print(until_date," ",symbol," ",coin_name)
        
        #calculate Vader scores
        compound_list = []
        positive_list = []
        negative_list = []
        neutral_list = []
        AnalyzeTweets(coin_name,until_date)
        
        
        row = {"symbol": symbol, 
               "date": until_date,
               "coin_name": coin_name,
               "coin_price": GetHistoricalPrice(symbol,utc_time),
               "compound_average": float(np.mean(compound_list)),
               "positive_average": float(np.mean(positive_list)), 
               "neutral_average": float(np.mean(neutral_list)), 
               "negative_average": float(np.mean(negative_list)) }
        
        coin_data_df = coin_data_df.append(row, ignore_index=True)   
        
    count = count + 1
    
    #get the next date
    next_date = date(year,month,day) + timedelta(1)
    year = next_date.year
    month = next_date.month
    day = next_date.day
    
coin_data_df

2112
2018-01-01   BTC   bitcoin
2018-01-01   ETH   ethereum
2018-01-01   XRP   ripple
2018-01-01   BCH   bitcoin cash / bcc
2018-01-01   ADA   cardano
2018-01-01   LTC   litecoin
2018-01-01   XLM   stellar


In [None]:
file_name = os.path.join("resources", "coin_tweet_analysis_2018_01.csv")
coin_data_df.to_csv(file_name)
