In [2]:
import pandas as pd

result = pd.read_csv("result.csv")
del result['Unnamed: 0']
result.head()

Unnamed: 0,tx_date,tx_count,Tx_volume_Ether,Tx_volume($),tx_count_ERC20,Tx_volume_ERC20,Tx_volume_ERC20($),Volume($),Market Cap($),DAU,CUM
0,2016-01-01,8233,344547.8,326639.542431,0.0,0.0,0.0,206062,71980386,8006,8006
1,2016-01-02,9164,259240.0,242940.035122,0.0,0.0,0.0,255504,71176658,8814,16820
2,2016-01-03,9258,370820.0,360401.805781,0.0,0.0,0.0,407632,73843292,9019,25839
3,2016-01-04,9474,154125.5,147109.68045,0.0,0.0,0.0,346245,72543707,9059,34898
4,2016-01-05,16430,1036291.0,984659.012911,0.0,0.0,0.0,219833,72240974,11164,46062


In [4]:
final = pd.read_csv("final.csv")
final.set_index("x_lst")

Unnamed: 0_level_0,Coef,Score
x_lst,Unnamed: 1_level_1,Unnamed: 2_level_1
tx_count,78539.91,0.815805
Tx_volume_Ether,787.4096,0.033819
Tx_volume($),5.828936,0.433478
tx_count_ERC20,87184.15,0.361827
Tx_volume_ERC20,-3.055525e-53,0.001385
Tx_volume_ERC20($),-7.225363e-56,0.000188
Volume($),17.84916,0.700871
DAU,132794.6,0.84806
CUM,197.6944,0.271924


In [5]:
top3 = result[['tx_date','DAU', 'tx_count', 'Volume($)']]
top3 = top3.set_index('tx_date')
top3.head()

Unnamed: 0_level_0,DAU,tx_count,Volume($)
tx_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-01-01,8006,8233,206062
2016-01-02,8814,9164,255504
2016-01-03,9019,9258,407632
2016-01-04,9059,9474,346245
2016-01-05,11164,16430,219833


## DAU와 Market Cap간의 관계

### Etherscan label로 exchange account 구별

In [51]:
import requests
from bs4 import BeautifulSoup

def label_lst(url):
    response = requests.get(url)
    html = response.text
    soup=BeautifulSoup(html,'html.parser')

    columns=soup.select('div.table-responsive > table > thead > tr > th')
    columnlist=[]
    for column in columns:
        columnlist.append(column.text)

    contents=soup.select('div.table-responsive > table > tbody > tr')
    dfcontent=[]
    alldfcontents=[]

    for content in contents:
        tds=content.find_all("td")
        for td in tds:
            dfcontent.append(td.text)
        alldfcontents.append(dfcontent)
        dfcontent=[]
    
    df = pd.DataFrame(columns=columnlist, data=alldfcontents)
    
    return df

In [52]:
url = "https://etherscan.io/accounts/1?ps=100&l=Exchange"

In [53]:
url2 = "https://etherscan.io/accounts/2?ps=100&l=Exchange"

In [54]:
label1 = label_lst(url)
label2 = label_lst(url2)
frames = [label1, label2]
label = pd.concat(frames)

In [60]:
label.columns = ['Index', 'Address', 'Label', 'Balance', 'TxCount']
del label['Index']

In [65]:
label.to_csv("Exchange_labels.csv")

In [69]:
import google
from google.cloud import bigquery
import pandas as pd

client = bigquery.Client()

sql = """
SELECT 
  DATE_TRUNC(DATE(transactions.block_timestamp), DAY) AS tx_date, 
  COUNT(*) AS tx_count, 
  SUM(transactions.value/POWER(10,18)) AS Tx_volume_Ether
FROM `bigquery-public-data.ethereum_blockchain.transactions` AS transactions
WHERE transactions.to_address NOT IN (SELECT Address FROM Exchange_labels.exchange_labels) and
      DATE(transactions.block_timestamp) >= DATE('2016-01-01') and DATE(transactions.block_timestamp) <= DATE('2018-12-31')
GROUP BY tx_date
ORDER by tx_date ASC
"""
df = client.query(sql).to_dataframe()
df.head()

Unnamed: 0,tx_date,tx_count,Tx_volume_Ether
0,2016-01-01,5802,228824.645462
1,2016-01-02,6624,186976.122885
2,2016-01-03,6719,271070.356268
3,2016-01-04,6969,116243.753852
4,2016-01-05,13886,383397.969954


In [70]:
sql = """
SELECT 
  DATE_TRUNC(DATE(transactions.block_timestamp), DAY) AS tx_date, 
  COUNT(*) AS tx_count_ERC20, 
  SUM(CAST(transactions.value AS FLOAT64)/POWER(10,18)) AS Tx_volume_ERC20
FROM `bigquery-public-data.ethereum_blockchain.token_transfers` AS transactions
WHERE transactions.to_address NOT IN (SELECT Address FROM Exchange_labels.exchange_labels) and
      DATE(transactions.block_timestamp) >= DATE('2016-01-01') and DATE(transactions.block_timestamp) <= DATE('2018-12-31')
GROUP BY tx_date
ORDER by tx_date ASC
"""
df_ERC20 = client.query(sql).to_dataframe()
df_ERC20.head()

Unnamed: 0,tx_date,tx_count_ERC20,Tx_volume_ERC20
0,2016-01-06,1,1e-15
1,2016-01-07,5,1.4012e-12
2,2016-01-09,7,5.01063e-11
3,2016-01-12,1,0.0
4,2016-01-13,1,5e-06


In [71]:
sql = """
SELECT 
  DATE_TRUNC(DATE(transactions.block_timestamp), DAY) AS tx_date,
  COUNT(transactions.from_address) AS DAU
  
FROM `bigquery-public-data.ethereum_blockchain.transactions` AS transactions
WHERE
  transactions.to_address NOT IN (SELECT Address FROM Exchange_labels.exchange_labels)
  AND DATE(transactions.block_timestamp) >= DATE('2016-01-01') and DATE(transactions.block_timestamp) <= DATE('2018-12-31')
  AND transactions.to_address is not null
  AND transactions.value > 0
  
GROUP BY tx_date
ORDER by tx_date ASC
"""
df_DAU = client.query(sql).to_dataframe()
df_DAU.head()

Unnamed: 0,tx_date,DAU
0,2016-01-01,5602
1,2016-01-02,6298
2,2016-01-03,6490
3,2016-01-04,6603
4,2016-01-05,8670


In [72]:
result = pd.read_csv("result.csv")
del result['Unnamed: 0']

In [75]:
result.head()

Unnamed: 0,tx_date,tx_count,Tx_volume_Ether,Tx_volume($),tx_count_ERC20,Tx_volume_ERC20,Tx_volume_ERC20($),Volume($),Market Cap($),DAU,CUM
0,2016-01-01,8233,344547.8,326639.542431,0.0,0.0,0.0,206062,71980386,8006,8006
1,2016-01-02,9164,259240.0,242940.035122,0.0,0.0,0.0,255504,71176658,8814,16820
2,2016-01-03,9258,370820.0,360401.805781,0.0,0.0,0.0,407632,73843292,9019,25839
3,2016-01-04,9474,154125.5,147109.68045,0.0,0.0,0.0,346245,72543707,9059,34898
4,2016-01-05,16430,1036291.0,984659.012911,0.0,0.0,0.0,219833,72240974,11164,46062


In [89]:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

X = df_DAU['DAU'].values
y = result['Market Cap($)'].values
X = X.reshape(len(X), 1)
y = y.reshape(len(y), 1)
regr = linear_model.LinearRegression()
regr.fit(X, y)
print("coef : ", regr.coef_[0][0],"\nR score : ", regr.score(X, y))

coef :  143080.31524232033 
R score :  0.8441994120999677


In [103]:
print("Difference between two DAUs R score: " , 0.848060 - 0.8441994120999677)

Difference between two DAUs R score:  0.0038605879000322973


In [105]:
import plotly.plotly as py
import plotly.graph_objs as go

high = go.Scatter(x=result['tx_date'], y=result['DAU'], name = 'DAU with exchange', line = dict(color = '#17BECF'), opacity = 0.8)

low = go.Scatter(x=df_DAU['tx_date'], y=df_DAU['DAU'], name = 'DAU', line = dict(color = '#fef01b'), opacity = 0.8)

plot_data = [high,low]

py.iplot(plot_data, filename = 'time-series-simple')

layout = dict(
    title = "DAUs",
    xaxis = dict(
        range = ['2016-01-01','2018-12-31'])
)

fig = dict(data=plot_data, layout=layout)

py.iplot(fig, filename = "DAUs")

> 2018년 5월 4일, exchange transaction이 포함된 DAU의 경우 710.919k, 조정된 값의 경우 402.397k

In [119]:
sql = """
SELECT 
  transactions.from_address AS from_Address,
  transactions.to_address AS to_address,
  transactions.value AS value

FROM `bigquery-public-data.ethereum_blockchain.transactions` AS transactions
WHERE DATE(transactions.block_timestamp) = DATE('2018-05-04')
"""
date_data = client.query(sql).to_dataframe()
date_data.head()

Unnamed: 0,from_Address,to_address,value
0,0x004075e4d4b1ce6c48c81cc940e2bad24b489e64,0x14fbca95be7e99c15cc2996c6c9d841e54b79425,0
1,0x25c6bf07f3848d146ab24f815cf9a3b1c4a1f27d,0x97e46522e754da786487ec752a1f1c495b9665c1,0
2,0xf008e2c7a7f16ac706c2e0ebd3f015d442016420,0x09678741bd50c3e74301f38fbd0136307099ae5d,0
3,0x03747f06215b44e498831da019b27f53e483599f,0xa74476443119a942de498590fe1f2454d7d4ac0d,0
4,0x0a3d5c8894bbe1e9113e4ed6f0c3b0d4fa6b131e,0xdfc85c08d5e5924ab49750e006cf8a826ffb7b13,0


In [146]:
# 거래소에 0 value로 전송한 transaction 수
d = date_data[date_data['to_address'].isin(label.Address.tolist())]
len(d[d['value'] == 0])

32

In [125]:
lst = date_data[date_data['to_address'].isin(label.Address.tolist())].to_address.unique().tolist()

In [140]:
# 당일 거래가 일어났던 Exchange
label_lst = []
for address in lst:
    label_lst.append(label[label['Address'] == address].Label.values[0])
print(sorted(label_lst))

['Bibox', 'BigONE', 'Bilaxy', 'Binance_1', 'Binance_2', 'Binance_3', 'Binance_4', 'Bitfinex_4', 'Bitfinex_5', 'Bittrex_1', 'Bittrex_2', 'Bity.com', 'Changelly', 'Cobinhood_1', 'Cobinhood_2', 'CoinExchange.io', 'Coinbene', 'Coindelta', 'Coinex', 'Gate.io_1', 'Gate.io_3', 'Gemini_2', 'HitBTC_2', 'HitBTC_3', 'Hotbit', 'Huobi_5', 'Huobi_9', 'Kraken_5', 'Kucoin', 'Liqui.io_1', 'Okex_1', 'Poloniex_2', 'Remitano', 'ShapeShift_3', 'ShapeShift_4', 'ShapeShift_5', 'ShapeShift_6', 'ShapeShift_7', 'Upbit']


In [154]:
exchange = date_data.groupby(by='to_address').size().reset_index(name='tx_count').sort_values(by='tx_count', ascending=False).iloc[:6]

In [152]:
exchange = pd.merge(exchange, label, how='inner', left_on='to_address', right_on='Address')

In [153]:
exchange

Unnamed: 0,to_address,tx_count,Address,Label,Balance,TxCount
0,0x3f5ce5fbfe3e9af3971dd833d26ba9b5c936f0be,281580,0x3f5ce5fbfe3e9af3971dd833d26ba9b5c936f0be,Binance_1,"182,881.99829304 Ether",8130845


In [168]:
print("2018 May 4th,", "%.2f" % (281580 / 8130845 * 100), "% of total tx_count were recorded")

2018 May 4th, 3.46 % of total tx_count were recorded


> 해당 날짜에 차이가 많이 났던 이유는 binance_1 때문, 이 거래소에 전체 transaction count의 3.5% 가량의 transactions이 발생함

In [180]:
value_Data = date_data[date_data['to_address'] == "0x3f5ce5fbfe3e9af3971dd833d26ba9b5c936f0be"]

import plotly.plotly as py
import plotly.graph_objs as go

trace0 = go.Box(
    x = value_Data.value,
    name = "All Points",
    jitter = 0.3,
    pointpos = -1.8,
    boxpoints = 'all',
    marker = dict(
        color = 'rgb(7,40,89)'),
    line = dict(
        color = 'rgb(7,40,89)')
)

trace1 = go.Box(
    x = value_Data.value,
    name = "Only Whiskers",
    boxpoints = False,
    marker = dict(
        color = 'rgb(9,56,125)'),
    line = dict(
        color = 'rgb(9,56,125)')
)

trace2 = go.Box(
    x = value_Data.value,
    name = "Suspected Outliers",
    boxpoints = 'suspectedoutliers',
    marker = dict(
        color = 'rgb(8,81,156)',
        outliercolor = 'rgba(219, 64, 82, 0.6)',
        line = dict(
            outliercolor = 'rgba(219, 64, 82, 0.6)',
            outlierwidth = 2)),
    line = dict(
        color = 'rgb(8,81,156)')
)

trace3 = go.Box(
    x = value_Data.value,
    name = "Whiskers and Outliers",
    boxpoints = 'outliers',
    marker = dict(
        color = 'rgb(107,174,214)'),
    line = dict(
        color = 'rgb(107,174,214)')
)

data = [trace0,trace1,trace2,trace3]

layout = go.Layout(
    title = "Binance transactions on May 4th"
)

fig = go.Figure(data=data,layout=layout)
py.iplot(fig, filename = "Binance transactions on May 4th")