## Ranking Cryptocurrencies

In [None]:
import requests
from retrying import retry
import logging
import pandas as pd
import numpy as np
import urllib2
import dill
import time
import json
from datetime import date, timedelta
from bs4 import BeautifulSoup
import unicodedata
import csv
import sys
import get_stack_overflow_data as gso
sys.path.append('../code')
sys.path.append('../data')
import re


#function that adds a delay before running a function, to be used as a decorator.
def sleeper(secs):
    def realsleeper(func):
        def wrapper(*args,**kwargs):
            time.sleep(secs)
            return func(*args,**kwargs)
        return wrapper
    return realsleeper

### Create list of currencies

### https://api.coinmarketcap.com

In [None]:
#use coinmarketcap api to obtain list of top 100 cryptocurrencies by market cap value
r = requests.get('https://api.coinmarketcap.com/v1/ticker/') 
financial_data = r.json()

In [None]:
currency_list = [d['id'] for d in financial_data]

In [None]:
print currency_list[:10]

### Obtain market cap values

In [None]:
#obtain market cap value data
market_cap_list = [float(d['market_cap_usd']) for d in financial_data]

In [None]:
print market_cap_list[:10]

### Get 30 day trading volume and net market cap growth data

In [None]:
#obtain list of symbols corresponding to cryptocurrencies
symbol_list = [d['symbol'] for d in financial_data]

In [None]:
print symbol_list[:10]

In [None]:
#obtain 30 day trading volume data
r1 = requests.get('https://coinmarketcap.com/currencies/volume/monthly/')
soup1 = BeautifulSoup(r1.text, 'lxml')
raw_list = []
monthly_volume_list = []

#extract monthly volume data from tr tags
for currency in currency_list:
    trtag = soup1.findAll('tr', attrs={'id':'id-' + currency})
    contents1 = trtag[0].contents
    raw_list.append(contents1)
    monthly_volume = int(re.sub('[$,]', '',str(contents1[-2].contents[-2].contents[-1])))
    monthly_volume_list.append(monthly_volume)


In [None]:
#examine format of contents of tr tags by printing one of them
trtag[0].contents

### Save list of currencies to file

In [None]:
toolkits = currency_list

In [None]:
with open("../data/blockchain_final.txt", "w") as f:
    f.writelines(" ".join(toolkits))

### get metrics

In [None]:
#create dataframe of financial metrics data
financial = pd.DataFrame(
    {'toolkit': currency_list,
     'market_cap_value': market_cap_list,
     'thirty_day_volume': monthly_volume_list
    })

In [None]:
logging.basicConfig(level=logging.INFO)

### github stats

In [None]:
with open("../code/secrets/github-token.nogit", "rb") as f:
    token = f.read()

headers = {'Authorization': 'token %s' % token}

@sleeper(3)#github will temporarily block requests from a user that makes more than 30 requests within a 60 second period. To be safe, use a 3 second pause between requests to limit rate to 20 requests per minute.
@retry(wait_exponential_multiplier=3000,wait_exponential_max=12000,stop_max_attempt_number=3)#in case request fails, retry up to 3 times, starting with a 3 second wait period and doubling that period each time.
def get_data_from_search_helper(query):
    r = requests.get('https://api.github.com/search/repositories?q='+\
                             query, 
                     headers=headers)
    return r

def get_data_from_search(query):
    """Use github search to return stars, forks for top query result"""
    
    r = get_data_from_search_helper(query)
    #r.raise_for_status()
    try:
        res = r.json()['items'][0]
        return {'toolkit': query, 'full_name': res['full_name'],
                'stars': int(res['stargazers_count']), 'forks': int(res['forks_count'])}
    except:
        return None

In [None]:
data = [res for res in (get_data_from_search(q) for q in toolkits)
        if res is not None]

In [None]:
github = pd.DataFrame(data)[['toolkit', 'full_name', 'forks', 'stars']]

In [None]:
with pd.option_context('display.width', 160, 'display.max_rows', None,'display.max_columns', None):
    print(github)

In [None]:
#github search returned wrong repo for some results, so they are manually changed below

corrections = [
    ('ethereum','ethereum'),
    ('ripple','ripple'),
    ('bitcoin-cash','Bitcoin-ABC/bitcoin-abc'),
    ('litecoin','litecoin-project'),
    ('stellar','stellar'),
    ('tron','tronprotocol'),
    ('dash','dashpay/dash'),
    ('tether','hardlydifficult/Crypto/blob/master'),
    ('vechain','vechain'),
    ('ethereum-classic','ethereumproject'),
    ('binance-coin','binance-exchange'),
    ('icon',None),
    ('monaco',None),
    ('salt',None),
    ('mithril',None),
    ('status','status-im'),
    ('storm',None),
    ('centrality',None),
    ('kin','kinecosystem/kin-token'),
    ('waves','wavesplatform/WavesDevKit'),
    ('lisk','LiskHQ'),
    ('wax',	None),
    ('augur','AugurProject/augur-core'),
    ('mixin',None),
    ('nano','nanocurrency/raiblocks'),
    ('ark','ArkEcosystem'),
    ('cortex',None),
    ('komodo','KomodoPlatform'),
    ('0x','0xProject'),
    ('bytom','Bytom/bytom/tree/master/crypto'),
    ('gas',None),
    ('aeternity','aeternity/epoch'),
    ('bancor',None),
    ('ardor',None),
    ('bytecoin-bcn','bcndev/bytecoin'),
    ('cryptonex','Cryptonex'),
    ('decred','decred'),
    ('dentacoin','Dentacoin'),
    ('electroneum','electroneum/electroneum'),
    ('enigma-project','enigmapc'),
    ('funfair','funfair-tech'),
    ('gifto','GIFTO-io'),
    ('golem-network-tokens','golemfactory/golem'),
    ('gxchain','gxchain'),
    ('iota','iotaledger'),
    ('kyber-network','kybernetwork'),
    ('loopring','loopring'),
    ('maker','makerdao'),
    ('nebulas-token',None),
    ('nem',	'NemProject'),
    ('nucleus-vision','NucleusVision'),
    ('nxt',	None),
    ('ontology','ontio/ontology'),
    ('pivx','PIVX-Project/PIVX'),
    ('populous','bitpopulous'),
    ('power-ledger',None),
    ('qash',None),
    ('reddcoin','reddcoin-project/reddcoin'),
    ('revain','Revain'),
    ('siacoin','NebulousLabs/Sia'),
    ('stratis','stratisproject'),
    ('syscoin','syscoin/syscoin'),
    ('veritaseum','veritaseum'),
    ('wanchain','wanchain')
]

#obtain correct stats
@sleeper(3)#github will temporarily block requests from a user that makes more than 30 requests within a 60 second period. To be safe, use a 3 second pause between requests to limit rate to 20 requests per minute.
@retry(wait_exponential_multiplier=3000,wait_exponential_max=12000,stop_max_attempt_number=3)#in case request fails, retry up to 3 times, starting with a 3 second wait period and doubling that period each time.
def corrections_helper(full_name, headers):
    r = requests.get('https://api.github.com/repos/' + full_name, headers=headers)        
    return r

for toolkit,full_name in corrections:
    try:
        r = corrections_helper(full_name,headers)
        res = r.json()
        print 'TOOLKIT:'
        print toolkit
        print 'FULL_NAME:'
        print full_name
        
        print res

        github.loc[github['toolkit'] == toolkit, 'full_name'] = full_name
        github.loc[github['toolkit'] == toolkit, 'forks'] = res['forks_count']
        github.loc[github['toolkit'] == toolkit, 'stars'] = res['stargazers_count']
    except:#to handle None
        print 'exception'
        github.loc[github['toolkit'] == toolkit, 'full_name'] = None
        github.loc[github['toolkit'] == toolkit, 'forks'] = 0
        github.loc[github['toolkit'] == toolkit, 'stars'] = 0 

In [None]:
github.sort_values(['stars'], ascending=False)

In [None]:
github.to_csv("../data/blockchain_results_github.csv", index=False)

### stackoverflow stats

In [None]:
#some items are overcounted because they are common words. List them here, and pair with the word cryptocurrency in searches in order to compensate for overcounting.
overcounted_list = ['icon',
'mixin',
'storm',
'0x',
'dash',
'apache storm',
'ontology',
'status',
'salt',
'gas',
'ripple',
'dash',
'nano',
'tether',
'iota',
'eos',
'tron',
'waves',
'centrality',
'maker',
'cortex',
'verge',
'ark',
'iota',
'wax',
'kin',
'fusion']

tag_list = [toolkit.replace(' ','-') for toolkit in toolkits]
#tag_counts = gso.get_tag_counts(tag_list)

@sleeper(3)#sleep to avoid throttling
@retry(wait_exponential_multiplier=60000,wait_exponential_max=240000,stop_max_attempt_number=3)#in case request fails, retry up to 3 times, starting with a 1 minute wait period and doubling that period each time.
def tag_counts_builder_helper(list_to_build,list_entry):
    #compensate for overcounting by pairing common words with the word "cryptocurrency"
    if(list_entry in overcounted_list):
        list_entry = list_entry + '-cryptocurrency'
    list_to_build += gso.get_tag_counts([list_entry])
    
tag_counts = []

for x in tag_list:
    tag_counts_builder_helper(tag_counts,x)

df_tags = pd.DataFrame(tag_counts)[['name', 'count']]
df_tags.columns = ['toolkit', 'so_tag_counts']
df_tags

In [None]:
#body_counts = {toolkit: gso.get_body_count(toolkit) for toolkit in tag_list}

@sleeper(3)#attempt to avoid throttling
@retry(wait_exponential_multiplier=60000,wait_exponential_max=240000,stop_max_attempt_number=3)#in case request fails, retry up to 3 times, starting with a 1 minute wait period and doubling that period each time.
#function used in building a dict each of whose values is the body count of the corresponding key
def body_counts_builder_helper(dict_to_build,key):
    #compensate for overcounting by pairing common words with the word "cryptocurrency"
    if(key in overcounted_list):
        modified_key = key + ' cryptocurrency'
    else:
        modified_key = key
    dict_to_build[key] = gso.get_body_count([modified_key])
    
body_counts = {}

for x in tag_list:
    body_counts_builder_helper(body_counts,x)

In [None]:
df_questions = pd.DataFrame.from_dict(body_counts, orient='index')
df_questions.reset_index(inplace=True)
df_questions.columns = ['toolkit', 'so_question_count']

In [None]:
so = df_tags.merge(df_questions, on='toolkit', how='outer')

In [None]:
so['toolkit'] = so['toolkit'].apply(lambda x: str(x).replace('-',' '))
so.sort_values(['so_question_count'], ascending=False).head(100)

In [None]:
so.to_csv("../data/blockchain_results_stackoverflow.csv", index=False)

### google search results stats

In [None]:
from googleapiclient.discovery import build

In [None]:
with open("../code/secrets/google_token.nogit", "rb") as f:
    my_api_key = f.read()
    
with open("../code/secrets/cse_token.nogit", "rb") as f:
    my_cse_id = f.read()

In [None]:
def search_term_modifier(search_term):
    #replace space with +, indicating to search for both words:
    search_term = search_term.replace(' ','+')
    #since gas is a common word, use its alias, neogas:
    if (search_term == 'gas'):
        search_term = 'neogas'
    return search_term

In [None]:
def google_search_results_count(search_term, api_key, cse_id):
    toolkit = search_term
    search_term = search_term_modifier(search_term)
    r= requests.get('https://www.googleapis.com/customsearch/v1?q="cryptocurrency"+OR+"blockchain"&alt=json&cx='+
                    my_cse_id+'&c2coff=1&dateRestrict=y5&exactTerms='+search_term+'&rc=1&key='+my_api_key)
    res = r.json()['queries']['request'][0]
    return {'toolkit': toolkit, 'search_results': int(res['totalResults'])}

def google_quarterly_growth_rate(search_term, api_key, cse_id):
    toolkit = search_term
    search_term = search_term_modifier(search_term)
    ##get count for last 6 months--- dateRestrict=m6
    r= requests.get('https://www.googleapis.com/customsearch/v1?q="cryptocurrency"+OR+"blockchain"&alt=json&cx='+
                    my_cse_id+'&c2coff=1&dateRestrict=m6&exactTerms='+search_term+'&rc=1&key='+my_api_key)
    res = r.json()['queries']['request'][0]
    six_months = int(res['totalResults'])
    ##get count for last 3 months--- dateRestrict=m3    
    r= requests.get('https://www.googleapis.com/customsearch/v1?q="cryptocurrency"+OR+"blockchain"&alt=json&cx='+
                    my_cse_id+'&c2coff=1&dateRestrict=m3&exactTerms='+search_term+'&rc=1&key='+my_api_key)
    res = r.json()['queries']['request'][0]
    current_quarter = int(res['totalResults'])
    
    last_quarter = six_months - current_quarter
    if (last_quarter == 0):#for handling the divide by 0 case
        growth_rate = float('NaN')
    else:
        growth_rate = (float(current_quarter)-float(last_quarter))/float(last_quarter)
    return {'toolkit': toolkit, 'growth_rate': growth_rate}

In [None]:
@sleeper(2)#attempt to avoid throttling
@retry(wait_exponential_multiplier=2000,wait_exponential_max=8000,stop_max_attempt_number=3)#in case request fails, retry up to 3 times, starting with a 2 second period and doubling that period each time.
#function used in building a list each of whose values is the google search results count
def google_results_builder_helper(list_to_build,query):
    res = google_search_results_count(query, my_api_key, my_cse_id)
    if res is not None:
        list_to_build.append(res)
        
results = []
for q in toolkits:
    google_results_builder_helper(results,q)
    
resultsDF = pd.DataFrame(results)[['toolkit', 'search_results']]

In [None]:
resultsDF.sort_values(['search_results'], ascending=False).head()

In [None]:
resultsDF.sort_values(['search_results'], ascending=False).head()

In [None]:
@sleeper(8)#sleep to avoid throttling
@retry(wait_exponential_multiplier=8000,wait_exponential_max=32000,stop_max_attempt_number=3)#in case request fails, retry up to 3 times, starting with a 2 second period and doubling that period each time.
#function used in building a list each of whose values is the google quarterly growth rate
def growth_rate_builder_helper(list_to_build,query):
    res = google_quarterly_growth_rate(query, my_api_key, my_cse_id)
    if res is not None:
        list_to_build.append(res)
        
growth_rate = []

for q in toolkits:
    growth_rate_builder_helper(growth_rate,q)
    
growthDF =  pd.DataFrame(growth_rate)[['toolkit', 'growth_rate']]

In [None]:
growthDF.sort_values(['growth_rate'], ascending=False).head()

In [None]:
googleDF = growthDF.merge(resultsDF, on='toolkit', copy = False)

In [None]:
googleDF.to_csv("../data/blockchain_results_google.csv", index=False)

## Combine all data

In [None]:
#in order to get consistent cryptocurrency names across tables, replace spaces with dashes in so table
so['toolkit'] = so['toolkit'].apply(lambda x: x.replace(' ','-'))

dltkDF = github.merge(so, on='toolkit', copy = False)
dltkDF = dltkDF.merge(googleDF, on='toolkit', copy = False)
dltkDF = dltkDF.merge(financial, on='toolkit', copy = False)

In [None]:
dltkDF.head()

In [None]:
dltkDF.to_csv("../output/blockchain_data.csv", index=False)