In [120]:
import numpy as np
import pandas as pd
import json
import tweepy
import seaborn as sns
import os
import requests as req
import calendar
import pytz
import time
import got3

from datetime import datetime, date, timedelta

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()



In [121]:
api_dir = os.path.dirname(os.path.dirname(os.path.realpath('keys')))
file_name = os.path.join(api_dir + "//keys", "api_keys.json")
data = json.load(open(file_name))

gkey = data['google_api_key']
consumer_key = data['twitter_consumer_key']
consumer_secret = data['twitter_consumer_secret']
access_token = data['twitter_access_token']
access_token_secret = data['twitter_access_token_secret']


In [122]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())



In [123]:
#coin list by symbol in descending order of market cap
#bitcoin - ethereum - ripple - bitcoin cash - cardano - litecoin - stella - neo - eos - xem  
coin_list = ['BTC', 'ETH', 'XRP', 'BCH', 'ADA', 'LTC', 'XLM', 'NEO', 'EOS', 'XEM']



In [124]:
#URLs
social_stats_url = "https://www.cryptocompare.com/api/data/socialstats/?id="

coin_list_url = "https://www.cryptocompare.com/api/data/coinlist/"

coin_historical_url = "https://min-api.cryptocompare.com/data/pricehistorical?fsym="
coin_historical_url_second = "&tsyms=USD&ts="



In [125]:
#variables 
coin_list_response = []

coin_data_df = pd.DataFrame(columns=["symbol", "date", "coin_name", "coin_price", 
                                     "compound_average",
                                     "positive_average", "neutral_average", "negative_average"
                                   ])



In [126]:
def GetCoinList():
    response = req.get(coin_list_url).json()

    count = 0
    for each in response["Data"]:
        count = count + 1
    
    print(count)
    return response


def GetCoinId(symbol):
    for each in coin_list_response["Data"]:
        row = coin_list_response["Data"][symbol]
        if row["Name"] == symbol:
            return int(row["Id"])


def GetCoinName(symbol):
    for each in coin_list_response["Data"]:
        row = coin_list_response["Data"][symbol]
        if row["Name"] == symbol:
            name = row["CoinName"].lower().split(" / ")
            return name[0]

               
def GetHistoricalPrice(symbol, utctime):
    #url to get historical price
    url = coin_historical_url + symbol + coin_historical_url_second + str(utctime)
    #get the historical price
    price = req.get(url).json()
    return float(price[symbol]["USD"])


def GetSocialStats(coin_id):
    url = social_stats_url + coin_id 
    return req.get(url).json()


def GetUtcTime(year,month,day):
    utc = calendar.timegm(datetime(year,month,day).utctimetuple())
    return utc

    
def GetLastDayOfMonth(year,month,day):
    d = date(year,month,day)
    a = date(d.year + (d.month == 12),(d.month + 1 if d.month < 12 else 1), 1) - timedelta(1)
    return a.day


def AnalyzeTweets(coin_name, until_date):
   
    tweetCriteria = got3.manager.TweetCriteria().setUntil(until_date).setQuerySearch(coin_name).setMaxTweets(max_tweets)
    
    for i in range(max_tweets):
        #each tweet
        tweets = got3.manager.TweetManager.getTweets(tweetCriteria)
        if tweets is None:
            pass
        else:
            try:
                tweet = tweets[i] 
                #run Vader analysis
                compound = analyzer.polarity_scores(tweet.text)["compound"]
                pos = analyzer.polarity_scores(tweet.text)["pos"]
                neu = analyzer.polarity_scores(tweet.text)["neu"]
                neg = analyzer.polarity_scores(tweet.text)["neg"]
                #add each score to an appropriate array
                compound_list.append(compound)
                positive_list.append(pos)
                negative_list.append(neg)
                neutral_list.append(neu)
            except Exception: 
                print("No more tweets available")
                break




In [127]:
#PLEASE CHANGE THIS BEFORE EXECUTING THE CODE
year = 2017
month = 1
day = 1

last_day_of_month = GetLastDayOfMonth(year,month,day)

#sampling max number of tweets per day per coin
max_tweets = 2

In [110]:
#time marker
begin = str(datetime.now())


In [111]:
#get coin list
coin_list_response = GetCoinList()

count = 0

#build coin data
while count < last_day_of_month:
    #get UTC time, one day at a time
    utc_time = GetUtcTime(year,month,day)
    until_date = date(year,month,day).strftime("%Y-%m-%d")  
 
    #process one coin at a time
    for symbol in coin_list:
        coin_name = GetCoinName(symbol)
        
        #calculate Vader scores
        compound_list = []
        positive_list = []
        negative_list = []
        neutral_list = []
        AnalyzeTweets(coin_name,until_date)
        
        row = {"symbol": symbol, 
               "date": until_date,
               "coin_name": coin_name,
               "coin_price": GetHistoricalPrice(symbol,utc_time),
               "compound_average": float(np.mean(compound_list)),
               "positive_average": float(np.mean(positive_list)), 
               "neutral_average": float(np.mean(neutral_list)), 
               "negative_average": float(np.mean(negative_list)) }
        
        coin_data_df = coin_data_df.append(row, ignore_index=True)   

        print(until_date," ",symbol," ",coin_name, np.mean(compound_list))

    count = count + 1
    
    #get the next date
    next_date = date(year,month,day) + timedelta(1)
    year = next_date.year
    month = next_date.month
    day = next_date.day
    
coin_data_df

2112
2017-01-01   BTC   bitcoin 0.0
2017-01-01   ETH   ethereum -0.08805
2017-01-01   XRP   ripple 0.0
2017-01-01   BCH   bitcoin cash 0.4404
2017-01-01   ADA   cardano 0.0
2017-01-01   LTC   litecoin 0.125
2017-01-01   XLM   stellar 0.3506
2017-01-01   NEO   neo 0.0
2017-01-01   EOS   eos 0.51315
2017-01-01   XEM   nem 0.0
2017-01-02   BTC   bitcoin -0.1626
2017-01-02   ETH   ethereum 0.2682
2017-01-02   XRP   ripple 0.0
2017-01-02   BCH   bitcoin cash -0.11315
2017-01-02   ADA   cardano 0.0
2017-01-02   LTC   litecoin 0.0
2017-01-02   XLM   stellar 0.0129
2017-01-02   NEO   neo 0.0
2017-01-02   EOS   eos -0.1122
2017-01-02   XEM   nem 0.0


Unnamed: 0,symbol,date,coin_name,coin_price,compound_average,positive_average,neutral_average,negative_average
0,BTC,2017-01-01,bitcoin,995.44,0.0,0.0,1.0,0.0
1,ETH,2017-01-01,ethereum,8.14,-0.08805,0.0,0.921,0.079
2,XRP,2017-01-01,ripple,0.0063,0.0,0.0,1.0,0.0
3,BCH,2017-01-01,bitcoin cash,0.0,0.4404,0.172,0.828,0.0
4,ADA,2017-01-01,cardano,0.0,0.0,0.0,1.0,0.0
5,LTC,2017-01-01,litecoin,4.42,0.125,0.059,0.941,0.0
6,XLM,2017-01-01,stellar,0.002447,0.3506,0.157,0.7865,0.0565
7,NEO,2017-01-01,neo,0.1381,0.0,0.0,1.0,0.0
8,EOS,2017-01-01,eos,0.0,0.51315,0.246,0.754,0.0
9,XEM,2017-01-01,nem,0.003363,0.0,0.0,1.0,0.0


In [112]:
file_name = os.path.join("resources", "coin_tweet_analysis_2018_02.csv")
coin_data_df.to_csv(file_name)


In [113]:
#time marker
print("begin...",begin)
print("end.....",str(datetime.now()))

begin... 2018-02-04 21:17:26.224563
end..... 2018-02-04 21:18:10.170386
