In [1109]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from google.cloud import bigquery
import time
%env GOOGLE_APPLICATION_CREDENTIALS=../secrets/bigquery-service-account.json 
client = bigquery.Client()
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

env: GOOGLE_APPLICATION_CREDENTIALS=../secrets/bigquery-service-account.json


In [1110]:
%load_ext google.cloud.bigquery

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery


In [1111]:
%%bigquery metadata --project masterarbeit-245718 --verbose 
SELECT * FROM `bigquery-public-data`.crypto_ethereum.INFORMATION_SCHEMA.COLUMNS where table_name = "traces"

Executing query with job ID: f57bd6f4-04b1-4013-a67e-9315e1d7f58f
Query executing: 0.88s
Query complete after 1.81s


In [1112]:
metadata

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,is_nullable,data_type,is_generated,generation_expression,is_stored,is_hidden,is_updatable,is_system_defined,is_partitioning_column,clustering_ordinal_position
0,bigquery-public-data,crypto_ethereum,traces,transaction_hash,1,YES,STRING,NEVER,,,NO,,NO,NO,
1,bigquery-public-data,crypto_ethereum,traces,transaction_index,2,YES,INT64,NEVER,,,NO,,NO,NO,
2,bigquery-public-data,crypto_ethereum,traces,from_address,3,YES,STRING,NEVER,,,NO,,NO,NO,
3,bigquery-public-data,crypto_ethereum,traces,to_address,4,YES,STRING,NEVER,,,NO,,NO,NO,
4,bigquery-public-data,crypto_ethereum,traces,value,5,YES,NUMERIC,NEVER,,,NO,,NO,NO,
5,bigquery-public-data,crypto_ethereum,traces,input,6,YES,STRING,NEVER,,,NO,,NO,NO,
6,bigquery-public-data,crypto_ethereum,traces,output,7,YES,STRING,NEVER,,,NO,,NO,NO,
7,bigquery-public-data,crypto_ethereum,traces,trace_type,8,NO,STRING,NEVER,,,NO,,NO,NO,
8,bigquery-public-data,crypto_ethereum,traces,call_type,9,YES,STRING,NEVER,,,NO,,NO,NO,
9,bigquery-public-data,crypto_ethereum,traces,reward_type,10,YES,STRING,NEVER,,,NO,,NO,NO,


In [1113]:
size_sample_data = 100

### get sample data for "call_type"

In [1114]:
%%bigquery call_types_sql_result --project masterarbeit-245718 --verbose 
select DISTINCT call_type from `bigquery-public-data.crypto_ethereum.traces`
where DATE(block_timestamp) >= '2019-07-6' AND DATE(block_timestamp) <= '2019-7-7'

Executing query with job ID: bf96c2e2-ae5f-45b0-bb63-0c4a3e4273ae
Query executing: 1.89s
Query complete after 4.72s


In [1115]:
call_types = list(call_types_sql_result["call_type"])
call_types

[None, 'call', 'delegatecall', 'staticcall', 'callcode']

In [1116]:
prob_call_types = [0.025, 0.9, 0.025, 0.025, 0.025]

In [1117]:
call_type_sample = np.random.choice(call_types_list, size_sample_data, p=prob_call_types)

### get sample data for status

In [1118]:
%%bigquery status_values --project masterarbeit-245718 --verbose 
select DISTINCT status from `bigquery-public-data.crypto_ethereum.traces`
where DATE(block_timestamp) >= '2019-07-6' AND DATE(block_timestamp) <= '2019-7-7'

Executing query with job ID: 5445e998-fbdd-4141-b1cb-e1963f44861d
Query executing: 11.14s
Query complete after 12.68s


In [1119]:
status_values_list = list(status_values["status"])
status_values_list

[1, 0]

In [1120]:
probs_status_values_list = [0.95, 0.05]

In [1121]:
status_sample = np.random.choice(status_values_list, size_sample_data, p=probs_status_values_list)
status_sample

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1])

### get sample accounts 

In [1122]:
exchanges = ["exchange_{}".format(i) for i in range(1,6)]
exchanges

['exchange_1', 'exchange_2', 'exchange_3', 'exchange_4', 'exchange_5']

In [1123]:
speculators = ["speculator_{}".format(i) for i in range(1,101)]

### get sample transactions (speculators to exchanges)

In [1124]:
from_spec_addresses = [np.random.choice(speculators) for i in range(int(size_sample_data/2))]
to_ex_addresses = [np.random.choice(exchanges) for i in range(int(size_sample_data/2))]
values_spec_to_ex = np.random.randint(1, 20, int(size_sample_data/2))

### get sample transactions (exchanges to speculators)

In [1125]:
to_spec_addresses = [np.random.choice(speculators) for i in range(int(size_sample_data/2))]
from_ex_addresses = [np.random.choice(exchanges) for i in range(int(size_sample_data/2))]
values_ex_to_spec = np.random.randint(1, 5, int(size_sample_data/2))

In [1126]:
data1 = pd.DataFrame(zip(from_ex_addresses, to_spec_addresses, values_ex_to_spec), columns=["from_address", "to_address", "value"])

In [1127]:
data2 = pd.DataFrame(zip(from_spec_addresses, to_ex_addresses, values_spec_to_ex), columns=["from_address", "to_address", "value"])

In [1128]:
txdata = data1.append(data2).reset_index(drop=True)

### block_timestamps

In [1129]:
import datetime as datetime

base = datetime.datetime.utcnow()
block_timestamps = [base - datetime.timedelta(seconds=x) for x in range(0, 12*size_sample_data, 12)]

### Merge Data

In [1130]:
traces_sampleData = txdata.copy()

In [1131]:
traces_sampleData["status"] = status_sample 

In [1132]:
traces_sampleData["call_type"] = call_type_sample 

In [1133]:
traces_sampleData["block_timestamp"] = block_timestamps 

In [1134]:
traces_sampleData.head()

Unnamed: 0,from_address,to_address,value,status,call_type,block_timestamp
0,exchange_1,speculator_78,2,1,call,2020-01-19 19:21:53.732281
1,exchange_5,speculator_66,4,1,call,2020-01-19 19:21:41.732281
2,exchange_1,speculator_80,4,1,call,2020-01-19 19:21:29.732281
3,exchange_3,speculator_9,3,1,call,2020-01-19 19:21:17.732281
4,exchange_2,speculator_81,1,1,call,2020-01-19 19:21:05.732281


### Upload data to bigquery

In [1135]:
traces_sampleData.to_gbq('ethereum_us.traces_sampleData', if_exists="replace")

1it [00:05,  5.18s/it]


### Test: weiReceived, weiSent

In [1136]:
%%bigquery res1 --project masterarbeit-245718 --verbose 
  with weiReceivedView as (
    -- debits
    select to_address, sum(ifnull(value, 0)) as weiReceived
    from ethereum_us.traces_sampleData
    where to_address is not null
    and status = 1
    and call_type = "call"
    group by to_address
), weiSentView as (
    -- credits
    select from_address, sum(ifnull(value, 0)) as weiSent
    from ethereum_us.traces_sampleData
    where from_address is not null
    and status = 1
    and call_type = "call"
    group by from_address
) select 
CASE 
  when to_address is not null then to_address
  when from_address is not null then from_address
end as address, ifnull(weiReceived,0) as weiReceived, ifnull(weiSent,0) as weiSent
from weiReceivedView full outer join weiSentView on from_address = to_address

Executing query with job ID: 42a71c83-d0d6-406a-801e-40da696c49ee
Query executing: 2.01s
Query complete after 2.73s


In [1137]:
res2 = res1.copy()
res2["balance"] = res1.weiReceived - res1.weiSent
res2 = res2.set_index("address")
res2 = res2.sort_values(by="address", ascending=False)
res2 = res2.astype("float")
balance_result_sql = res2

In [1138]:
%%bigquery traces_sampleData --project masterarbeit-245718 --verbose 
select * from `ethereum_us.traces_sampleData`

Executing query with job ID: b716dc01-0fa1-41e4-b243-2f8599653acd
Query executing: 0.62s
Query complete after 1.08s


In [1139]:
traces_sampleData

Unnamed: 0,from_address,to_address,value,status,call_type,block_timestamp
0,exchange_3,speculator_33,1,0,call,2020-01-19 19:15:41.732281+00:00
1,exchange_4,speculator_85,2,0,call,2020-01-19 19:18:41.732281+00:00
2,exchange_5,speculator_17,2,0,call,2020-01-19 19:13:41.732281+00:00
3,exchange_5,speculator_99,2,0,call,2020-01-19 19:12:05.732281+00:00
4,speculator_65,exchange_2,4,0,call,2020-01-19 19:06:53.732281+00:00
5,speculator_99,exchange_4,6,0,delegatecall,2020-01-19 19:02:53.732281+00:00
6,speculator_7,exchange_4,7,0,call,2020-01-19 19:03:05.732281+00:00
7,exchange_2,speculator_81,1,1,call,2020-01-19 19:21:05.732281+00:00
8,exchange_2,speculator_50,1,1,call,2020-01-19 19:19:53.732281+00:00
9,exchange_1,speculator_84,1,1,call,2020-01-19 19:19:41.732281+00:00


In [1140]:
data1 = [row for (index, row) in traces_sampleData.iterrows() if row.call_type == "call" and row.status == 1]
data1 = pd.DataFrame(data1)

In [1141]:
data2 = pd.DataFrame(index=set(data1["from_address"].unique()) | set(data1["to_address"].unique()))
data2["weiReceived"] = data1.groupby('to_address').sum().value
data2["weiSent"] = data1.groupby('from_address').sum().value
data2["weiSent"] = data2["weiSent"].fillna(0.)
data2["weiReceived"] = data2["weiReceived"].fillna(0.)
data2["balance"] = data2["weiReceived"] - data2["weiSent"]
data2.index = data2.index.rename("address")
data2 = data2.sort_values(by="address", ascending=False)
balance_result_py = data2

In [1142]:
pd.testing.assert_frame_equal(balance_result_py, balance_result_sql)
print("weiSent, weiReceived Test succeeded!!")

weiSent, weiReceived Test succeeded!!


### Test: txSent, txReceived

In [1143]:
%%bigquery res2 --project masterarbeit-245718 --verbose 
with txSent as (
  SELECT from_address, count(*) as numberOfTranscationsSent FROM `masterarbeit-245718.ethereum_us.traces_sampleData` 
  where to_address is not null and status = 1 and call_type = "call"
  group by from_address
), txReceived as (
  SELECT to_address, count(*) as numberOfTranscationsReceived FROM `masterarbeit-245718.ethereum_us.traces_sampleData` 
  where to_address is not null and status = 1 and call_type = "call"
  group by to_address
) SELECT 
CASE  
  WHEN to_address IS NOT NULL THEN to_address
  WHEN from_address IS NOT NULL THEN from_address
END AS address,
IFNULL(numberOfTranscationsReceived, 0) as numberOfTranscationsReceived, 
IFNULL(numberOfTranscationsSent, 0) as numberOfTranscationsSent
from txReceived FULL OUTER JOIN txSent on to_address = from_address

Executing query with job ID: 152411c8-b0ce-4adb-b7c8-8da79185cbeb
Query executing: 0.44s
Query complete after 3.29s


In [1144]:
res3 = res2.copy()
res3 = res3.sort_values(by="address", ascending=False)
tx_sent_received_result_sql = res3.set_index("address", drop=True)

In [1145]:
data3 = pd.DataFrame(index=set(data1["from_address"].unique()) | set(data1["to_address"].unique()))
data3["numberOfTranscationsReceived"] = data1.groupby('to_address').count().value
data3["numberOfTranscationsSent"] = data1.groupby('from_address').count().value
data3 = data3.fillna(0)
data3 = data3.astype("int")
data3.index = data3.index.rename("address")
data3 = data3.sort_values(by="address", ascending=False)
tx_sent_received_result_py = data3

In [1146]:
pd.testing.assert_frame_equal(tx_sent_received_result_py, tx_sent_received_result_sql)
print("txSent, txReceived Test succeeded !!")

txSent, txReceived Test succeeded !!


In [1147]:
tx_sent_received_result_py

Unnamed: 0_level_0,numberOfTranscationsReceived,numberOfTranscationsSent
address,Unnamed: 1_level_1,Unnamed: 2_level_1
speculator_99,1,1
speculator_98,1,0
speculator_97,1,1
speculator_96,0,1
speculator_95,0,2
speculator_94,0,1
speculator_93,0,2
speculator_92,1,1
speculator_90,0,1
speculator_9,1,1


In [1148]:
tx_sent_received_result_sql

Unnamed: 0_level_0,numberOfTranscationsReceived,numberOfTranscationsSent
address,Unnamed: 1_level_1,Unnamed: 2_level_1
speculator_99,1,1
speculator_98,1,0
speculator_97,1,1
speculator_96,0,1
speculator_95,0,2
speculator_94,0,1
speculator_93,0,2
speculator_92,1,1
speculator_90,0,1
speculator_9,1,1


### Test avg time between tx

In [1149]:
%%bigquery res4 --project masterarbeit-245718 --verbose 
with receivedTx as (
  SELECT to_address, count(*) as numberOfTranscationsSent FROM `masterarbeit-245718.ethereum_us.traces_sampleData` 
  where to_address is not null 
    and status = 1 
    and call_type = "call"
  group by to_address),
timeStampDiffs as (
  SELECT to_address, TIMESTAMP_DIFF(MAX(block_timestamp), MIN( block_timestamp ), second ) as timestampDiff
  FROM `masterarbeit-245718.ethereum_us.traces_sampleData`
  where to_address is not null 
    and status = 1 
    and call_type = "call"
  group by to_address
) select to_address as address, 
CASE 
  when (numberOfTranscationsSent - 1)  > 0 then timestampDiff / (numberOfTranscationsSent - 1) 
  else 0
end as avgTimeDiffBetweenReceivedTransactions
   from receivedTx inner join  timeStampDiffs using(to_address)

Executing query with job ID: 90fd3173-cf27-4b55-867f-f705c4a2d27d
Query executing: 0.56s
Query complete after 2.87s


In [1150]:
res6 = res4.set_index("address", drop=True)
res6 = res6.sort_values(by="address")
avg_time_diff_receivedtx_result_sql = res6
avg_time_diff_receivedtx_result_sql

Unnamed: 0_level_0,avgTimeDiffBetweenReceivedTransactions
address,Unnamed: 1_level_1
exchange_1,61.5
exchange_2,72.0
exchange_3,72.0
exchange_4,62.666667
exchange_5,70.285714
speculator_15,0.0
speculator_17,0.0
speculator_19,0.0
speculator_2,444.0
speculator_23,0.0


In [1151]:
%%bigquery res5 --project masterarbeit-245718 --verbose 
with sentTx as (
  SELECT from_address, count(*) as numberOfTranscationsSent FROM `masterarbeit-245718.ethereum_us.traces_sampleData` 
  where to_address is not null and status = 1 and call_type = "call"
  group by from_address),
timeStampDiffs as (
  SELECT from_address, TIMESTAMP_DIFF(MAX(block_timestamp), MIN( block_timestamp ), second ) as timestampDiff
  FROM `masterarbeit-245718.ethereum_us.traces_sampleData`
  where to_address is not null and status = 1 and call_type = "call"
  group by from_address
) select from_address as address, 
CASE 
  when (numberOfTranscationsSent - 1)  > 0 then timestampDiff / (numberOfTranscationsSent - 1) 
  else 0
end as avgTimeDiffBetweenSentTransactions
   from sentTx inner join  timeStampDiffs using(from_address)

Executing query with job ID: 9417090f-b624-4de2-a619-1091cf5c702f
Query executing: 0.51s
Query complete after 2.17s


In [1152]:
res6 = res5.set_index("address", drop=True)
res6 = res6.sort_values(by="address")
avg_time_diff_senttx_result_sql = res6
avg_time_diff_senttx_result_sql

Unnamed: 0_level_0,avgTimeDiffBetweenSentTransactions
address,Unnamed: 1_level_1
exchange_1,61.333333
exchange_2,88.0
exchange_3,72.0
exchange_4,78.0
exchange_5,46.909091
speculator_10,0.0
speculator_12,0.0
speculator_17,24.0
speculator_2,0.0
speculator_23,0.0


In [1153]:
res7 = [row for (index, row) in traces_sampleData.iterrows() if row.call_type == "call" and row.status == 1]
res7 = pd.DataFrame(res7)
res8 = res7.groupby("from_address").max().block_timestamp
res9 = res7.groupby("from_address").min().block_timestamp
res10 = res8 - res9
res10 = res10.rename("seconds_diff")
res10 = tx_sent_received_result_py.join(res10, how="right").drop("numberOfTranscationsReceived", axis=1)
res10 = res10.fillna(0.)
res10["avgTimeDiffBetweenSentTransactions"] = res10["seconds_diff"] / (res10["numberOfTranscationsSent"] - 1)
res10["avgTimeDiffBetweenSentTransactions"] = res10["avgTimeDiffBetweenSentTransactions"].fillna(datetime.timedelta(0))
res10.index =res10.index.rename("address")
res10 = res10.sort_values(by="address")
res10 = res10.drop(["numberOfTranscationsSent", "seconds_diff"],axis=1)
res10["avgTimeDiffBetweenSentTransactions"] = [ts.total_seconds() for ts in res10["avgTimeDiffBetweenSentTransactions"]]
avg_time_diff_senttx_result_py = res10
avg_time_diff_senttx_result_py

Unnamed: 0_level_0,avgTimeDiffBetweenSentTransactions
address,Unnamed: 1_level_1
exchange_1,61.333333
exchange_2,88.0
exchange_3,72.0
exchange_4,78.0
exchange_5,46.909091
speculator_10,0.0
speculator_12,0.0
speculator_17,24.0
speculator_2,0.0
speculator_23,0.0


In [1154]:
pd.testing.assert_frame_equal(avg_time_diff_senttx_result_sql, avg_time_diff_senttx_result_py)
print("avgTimeDiffBetweenSentTransactions Test succeeded !!")

avgTimeDiffBetweenSentTransactions Test succeeded !!


In [1155]:
%%bigquery --project masterarbeit-245718 --verbose 
select * from `ethereum_us.traces_sampleData`

Executing query with job ID: 4044523b-33c0-488a-9b1b-3bea31354769
Query executing: 0.41s
Query complete after 0.89s


Unnamed: 0,from_address,to_address,value,status,call_type,block_timestamp
0,exchange_3,speculator_33,1,0,call,2020-01-19 19:15:41.732281+00:00
1,exchange_4,speculator_85,2,0,call,2020-01-19 19:18:41.732281+00:00
2,exchange_5,speculator_17,2,0,call,2020-01-19 19:13:41.732281+00:00
3,exchange_5,speculator_99,2,0,call,2020-01-19 19:12:05.732281+00:00
4,speculator_65,exchange_2,4,0,call,2020-01-19 19:06:53.732281+00:00
5,speculator_99,exchange_4,6,0,delegatecall,2020-01-19 19:02:53.732281+00:00
6,speculator_7,exchange_4,7,0,call,2020-01-19 19:03:05.732281+00:00
7,exchange_2,speculator_81,1,1,call,2020-01-19 19:21:05.732281+00:00
8,exchange_2,speculator_50,1,1,call,2020-01-19 19:19:53.732281+00:00
9,exchange_1,speculator_84,1,1,call,2020-01-19 19:19:41.732281+00:00


In [1156]:
avg_time_diff_senttx_result_sql

Unnamed: 0_level_0,avgTimeDiffBetweenSentTransactions
address,Unnamed: 1_level_1
exchange_1,61.333333
exchange_2,88.0
exchange_3,72.0
exchange_4,78.0
exchange_5,46.909091
speculator_10,0.0
speculator_12,0.0
speculator_17,24.0
speculator_2,0.0
speculator_23,0.0


In [1157]:
tx_sent_received_result_sql

Unnamed: 0_level_0,numberOfTranscationsReceived,numberOfTranscationsSent
address,Unnamed: 1_level_1,Unnamed: 2_level_1
speculator_99,1,1
speculator_98,1,0
speculator_97,1,1
speculator_96,0,1
speculator_95,0,2
speculator_94,0,1
speculator_93,0,2
speculator_92,1,1
speculator_90,0,1
speculator_9,1,1


In [1158]:
balance_result_sql

Unnamed: 0_level_0,weiReceived,weiSent,balance
address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
speculator_99,1.0,9.0,-8.0
speculator_98,3.0,0.0,3.0
speculator_97,3.0,15.0,-12.0
speculator_96,0.0,7.0,-7.0
speculator_95,0.0,23.0,-23.0
speculator_94,0.0,3.0,-3.0
speculator_93,0.0,21.0,-21.0
speculator_92,3.0,3.0,0.0
speculator_90,0.0,15.0,-15.0
speculator_9,3.0,6.0,-3.0


In [1159]:
features = balance_result_sql.join(tx_sent_received_result_sql,how="left")
features = features.join(avg_time_diff_senttx_result_sql)
features = features.join(avg_time_diff_receivedtx_result_sql)
features = features.sort_values(by="balance", ascending=False)
features = features.fillna(0.0)
import sys
# addresses that have sent/received only one transaction get the avg time max * 2
features["avgTimeDiffBetweenSentTransactions"] = features["avgTimeDiffBetweenSentTransactions"].replace(to_replace=0.0, value=2 * max(features["avgTimeDiffBetweenSentTransactions"]))
features["avgTimeDiffBetweenReceivedTransactions"] = features["avgTimeDiffBetweenReceivedTransactions"].replace(to_replace=0.0, value=2 * max(features["avgTimeDiffBetweenReceivedTransactions"]))

In [1160]:
features = features.reset_index()

In [1161]:
features.to_gbq('ethereum_us.features_sampleData', if_exists="replace")

1it [00:02,  2.94s/it]


# Deprecated

In [1162]:
query = """
create or replace table `masterarbeit-245718.ethereum_us.test_tempTable` (
{}
);
""".format(s1);

print(query);

client.query(query).result();

NameError: name 's1' is not defined

In [None]:
def send(_from ,_to, _value):
    query = """
    INSERT INTO `masterarbeit-245718.ethereum_us.test_tempTable` VALUES (
    "transaction_hash",
    1,
    "{}",
    "{}",
    {},
    "input",
    "output",
    "trace_type",
    "call",
    "reward_type",
    1,
    1,
    1,
    "trace_address",
    "error",
    1,
    "0001-01-01 00:00:00",
    1,
    "block_hash");
    """.format(_from, _to, _value)
    return client.query(query)

In [None]:
for i in range(0,2,1):
    queryJob = send("0x0","0x1",2)

queryJob.result();

In [None]:
for i in range(0,3,1):
    queryJob = send("0x1","0x0",1)

queryJob.result();

In [None]:
query = "select * from `masterarbeit-245718.ethereum_us.test_tempTable`;"
data = client.query(query).result().to_dataframe(); 
data

In [None]:
query = """
  with weiReceivedView as (
    -- debits
    select to_address, sum(ifnull(value, 0)) as weiReceived
    from `masterarbeit-245718.ethereum_us.test_tempTable`
    where to_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
    group by to_address
), weiSentView as (
    -- credits
    select from_address, sum(ifnull(value, 0)) as weiSent
    from `masterarbeit-245718.ethereum_us.test_tempTable`
    where from_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
    group by from_address
) select 
CASE 
  when to_address is not null then to_address
  when from_address is not null then from_address
end as address, ifnull(weiReceived,0) as weiReceived, ifnull(weiSent,0) as weiSent
from weiReceivedView full outer join weiSentView on from_address = to_address
"""

data = client.query(query).result().to_dataframe();
data

In [None]:
query = """
with txSent as (
  SELECT from_address, count(*) as numberOfTranscationsSent FROM `masterarbeit-245718.ethereum_us.test_tempTable` 
  where to_address is not null and status = 1 and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null) 
  group by from_address
), txReceived as (
  SELECT to_address, count(*) as numberOfTranscationsReceived FROM `masterarbeit-245718.ethereum_us.test_tempTable` 
  where to_address is not null and status = 1 and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
  group by to_address
) SELECT 
CASE  
  WHEN to_address IS NOT NULL THEN to_address
  WHEN from_address IS NOT NULL THEN from_address
END AS address,
IFNULL(numberOfTranscationsReceived, 0) as numberOfTranscationsReceived, 
IFNULL(numberOfTranscationsSent, 0) as numberOfTranscationsSent
from txReceived FULL OUTER JOIN txSent on to_address = from_address
"""

data = client.query(query).result().to_dataframe();
data

In [None]:
query = """
  with weiReceivedView as (
    -- debits
    select to_address, sum(ifnull(value, 0)) as weiReceived
    from `masterarbeit-245718.ethereum_us.top40k_week1777_traces`
    where to_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
    group by to_address
), weiSentView as (
    -- credits
    select from_address, sum(ifnull(value, 0)) as weiSent
    from `masterarbeit-245718.ethereum_us.top40k_week1777_traces`
    where from_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
    group by from_address
) select 
CASE 
  when to_address is not null then to_address
  when from_address is not null then from_address
end as address, ifnull(weiReceived,0) as weiReceived, ifnull(weiSent,0) as weiSent
from weiReceivedView full outer join weiSentView on from_address = to_address
"""

data1 = client.query(query).result().to_dataframe();
data1

In [None]:
data1["balance"] = data1["weiReceived"] - data1["weiSent"]
data1.sort_values(by="balance", ascending=False)

In [None]:
query = """
with double_entry_book as (
    -- debits
    select to_address as address, value as value
    from `masterarbeit-245718.ethereum_us.top40k_week1777_traces`
    where to_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
    union all
    -- credits
    select from_address as address, -value as value
    from `masterarbeit-245718.ethereum_us.top40k_week1777_traces`
    where from_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
)
select address, sum(value) as balance
from double_entry_book
group by address
order by balance desc
"""

data2 = client.query(query).result().to_dataframe();
data2

# SQL Queries via python für kleineren Datensatz verifizieren

Folgenden SQL Befehl gilt es zu verifizieren:

``` sql
with double_entry_book as (
    -- debits
    select to_address as address, value as value
    from `masterarbeit-245718.ethereum_us.top40k_week1777_traces`
    where to_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
    union all
    -- credits
    select from_address as address, -value as value
    from `masterarbeit-245718.ethereum_us.top40k_week1777_traces`
    where from_address is not null
    and status = 1
    and (call_type not in ('delegatecall', 'callcode', 'staticcall') or call_type is null)
)
select address, sum(value) as balance
from double_entry_book
group by address
order by balance desc
```

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from google.cloud import bigquery
import time
%env GOOGLE_APPLICATION_CREDENTIALS=../secrets/bigquery-service-account.json 
client = bigquery.Client()

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
%load_ext google.cloud.bigquery

```sql
select * from `bigquery-public-data.crypto_ethereum.traces` where DATE(block_timestamp) >= '2019-07-1' AND DATE(block_timestamp) <= '2019-7-7'
```

In [None]:
%%bigquery data --project masterarbeit-245718 --verbose 
select * from `ethereum_us.traces_week` LIMIT 15000

In [None]:
%%bigquery data --project masterarbeit-245718 --verbose 
select * from `bigquery-public-data.crypto_ethereum.traces`
where DATE(block_timestamp) >= '2019-07-6' AND DATE(block_timestamp) <= '2019-7-7' LIMIT 100000

In [None]:
%%bigquery data --project masterarbeit-245718 --verbose 
select * from `ethereum_us.traces_testdata`

In [None]:
s1 = set(data["to_address"].unique())
s2 = set(data["from_address"].unique())
print("Therea are {} unique addresses".format(len(s1 | s2)))

In [None]:
res3 = [row for (index, row) in data.iterrows() if row.call_type == "call"]
res3 = pd.DataFrame(res3)
res3.value = res3.value.astype(float)
print("There are {} addresses left.".format(len(set(res3["from_address"].unique())  |set(res3["to_address"].unique()))))

In [None]:
res4 = pd.DataFrame(index=set(res3["from_address"].unique()) | set(res3["to_address"].unique()))
res4["weiSent"] = res3.groupby('from_address').sum().value
res4["weiReceived"] = res3.groupby('to_address').sum().value
res4["weiSent"] = res4["weiSent"].fillna(0.)
res4["weiReceived"] = res4["weiReceived"].fillna(0.)
# res4[(res4.weiSent > 0) & (res4.weiReceived > 0)]

In [None]:
res4["balance"] = res4["weiReceived"] - res4["weiSent"]
res4 = res4.sort_values(by="balance", ascending=False)

In [None]:
res4

In [None]:
%%bigquery sqldata --project masterarbeit-245718 --verbose 
with double_entry_book as (
    (select to_address as address, value as value
    from `ethereum_us.traces_testdata`
    where status = 1
    and (call_type = 'call')
    and DATE(block_timestamp) >= '2019-07-6' 
    and DATE(block_timestamp) <= '2019-7-7')
    union all
    (select from_address as address, -value as value
    from `ethereum_us.traces_testdata`
    where status = 1
    and (call_type = 'call')
    and DATE(block_timestamp) >= '2019-07-6' 
    and DATE(block_timestamp) <= '2019-7-7')
    )
select address, sum(value) as sqlbalance
from double_entry_book
group by address
order by sqlbalance desc

In [None]:
sqldata.sqlbalance = sqldata.sqlbalance.astype(float)
sqldata = sqldata.set_index("address")

In [None]:
len(sqldata)
len(res4)
pd.concat([sqldata,res4], join="inner",  axis=1)

In [None]:
%%bigquery sqldata1 --project masterarbeit-245718 --verbose 
with weiReceivedView as (
    -- debits
    select to_address, sum(ifnull(value, 0)) as weiReceived
    from `masterarbeit-245718.ethereum_us.traces_testdata`
    where to_address is not null
    and status = 1
    and (call_type = "call")
    group by to_address
), weiSentView as (
    -- credits
    select from_address, sum(ifnull(value, 0)) as weiSent
    from `masterarbeit-245718.ethereum_us.traces_testdata`
    where from_address is not null
    and status = 1
    and (call_type = "call")
    group by from_address
) select 
CASE 
  when to_address is not null then to_address
  when from_address is not null then from_address
end as address, ifnull(weiSent,0) as weiSent, ifnull(weiReceived,0) as weiReceived
from weiReceivedView full outer join weiSentView on from_address = to_address


In [None]:
sqldata1

In [None]:
sqldata1["weiReceived"] = sqldata1["weiReceived"].astype(float)
sqldata1["weiSent"] = sqldata1["weiSent"].astype(float)

In [None]:
sqldata1 = sqldata1.set_index("address")

In [None]:
pd.concat([sqldata1,res4], join="inner",  axis=1)

In [None]:
len(sqldata1)
len(res4)

for i,r in sqldata1.iterrows():
    print(r.weiSent == res4.loc[i,"weiSent"])

In [None]:
res41.loc["0x0000000000085d4780b73119b644ae5ecd22b376", :].value
res42.loc["0x0000000000085d4780b73119b644ae5ecd22b376", :].value

In [None]:
print("Traces with call_type 'delegate_call', 'staticcall': {}".format(len(res2) - len(res3)))

In [None]:
for index, row in res2.iterrows():
    print(row.call_type)

In [None]:
x = [ e for e in data["from_address"] if not str(e).startswith("0x")]
x

In [None]:
data["from_address"] = data["from_address"].astype(str)
data[data["from_address"] == "None"]

In [None]:
query = """
select * from `masterarbeit-245718.ethereum_us.traces_week`
"""

data = client.query(query)
data

In [None]:
ri = data.result()

In [None]:
ri.total_rows

In [None]:
ri.to_dataframe(progress_bar_type='tqdm_notebook')

In [None]:
import json;
with open("./traces_{start}_{end}".format(start=interval[0], end=interval[1]), w) as json_file
    json.dump(data, json_file)

In [None]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite://', echo=False)
df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']})
df.to_sql('users', con=engine)
engine.execute("SELECT * FROM users").fetchall()