In [2]:
# Opens a client to Google BigQuery to pull the data
from google.cloud import bigquery
from google.oauth2 import service_account

key_path = 'service_key_google_cloud.json'

credentials = service_account.Credentials.from_service_account_file(
    key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)

client = bigquery.Client(credentials=credentials, project=credentials.project_id,)
def find_starting_date():
    query_string = """
    
    SELECT block_timestamp from `bigquery-public-data.crypto_ethereum.transactions`
    where block_number = 
        (select min(block_number) from `bigquery-public-data.crypto_ethereum.transactions`)
    """
    return client.query(query_string).result().to_dataframe()

def find_all_txns_on_date(start, end):
    start_timestr = start.strftime('%Y-%m-%d')
    end_timestr = end.strftime('%Y-%m-%d')
    query_string = f"""

    SELECT *
    FROM `bigquery-public-data.crypto_ethereum.transactions`
    WHERE block_timestamp BETWEEN '{start_timestr}' AND '{end_timestr}'
    """
    return client.query(query_string).result().to_dataframe()

def curved_edges2(edges, pos, dist_ratio=0.2, bezier_precision=20, polarity='random'):
    l = edges.shape[0] # number of edges

    if polarity == 'random':
        # Random polarity of curve
        rnd_1 = np.where(np.random.randint(2, size=l)==0, -1, 1)
        rnd_2 = np.where(np.random.randint(2, size=l)==0, -1, 1)
    else:
        # Create a fixed (hashed) polarity column in the case we use fixed polarity
        # This is useful, e.g., for animations
        rnd_1 = np.where(np.mod(np.vectorize(hash)(edges[:,0])+np.vectorize(hash)(edges[:,1]),2)==0,-1,1)
        rnd_2 = np.where(np.mod(np.vectorize(hash)(edges[:,0])+np.vectorize(hash)(edges[:,1]),2)==0,-1,1)
    
    # Coordinates (x,y) of both nodes for each edge
    # e.g., https://stackoverflow.com/questions/16992713/translate-every-element-in-numpy-array-according-to-key
    # Note the np.vectorize method doesn't work for all node position dictionaries for some reason
    u, inv = np.unique(edges, return_inverse = True)
    #print(np.array([pos[x] for x in u])[inv])
    coords = np.array([pos[x] for x in u])[inv].reshape([edges.shape[0], edges.shape[1], 3])
    coords_node1 = coords[:,0,:]
    coords_node2 = coords[:,1,:]
    # Swap node1/node2 allocations to make sure the directionality works correctly
    should_swap = coords_node1[:,0] > coords_node2[:,0]
    coords_node1[should_swap], coords_node2[should_swap] = coords_node2[should_swap], coords_node1[should_swap]
    
    # Distance for control points
    norm = np.sqrt(np.sum((coords_node1-coords_node2)**2, axis=1))
    dist = dist_ratio * norm
    # Gradients of line connecting node & perpendicular
    unit_vector = (coords_node2-coords_node1)/norm[:,None]
    perp_vec = np.array([unit_vector[:,0]*unit_vector[:,2], unit_vector[:,1]*unit_vector[:,2], -(unit_vector[:,0]**2+unit_vector[:,1]**2)]).T

    coords_node1_displace = coords_node1 + (unit_vector * dist[:,None])
    coords_node2_displace = coords_node2 - (unit_vector * dist[:,None])

    coords_node1_ctrl = coords_node1_displace + rnd_1[:,None]*(perp_vec*dist[:,None])
    coords_node2_ctrl = coords_node2_displace + rnd_2[:,None]*(perp_vec*dist[:,None])

    # Combine all these four (x,y) columns into a 'node matrix'
    node_matrix = np.array([coords_node1, coords_node1_ctrl, coords_node2_ctrl, coords_node2])
    
    # Create the Bezier curves and store them in a list
    curveplots = []
    curve_dict = {}
    for i in tqdm(range(l)):
        nodes = node_matrix[:,i,:].T
        curve = bezier.Curve.from_nodes(nodes).evaluate_multi(np.linspace(0,1,bezier_precision)).T
        curveplots.append(curve)
        curve_dict["-".join(edges[i])] = curve
        
    # Return an array of these curves
    curves = np.array(curveplots)
    return curves, curve_dict

Collecting google-cloud-bigquery
  Downloading google_cloud_bigquery-2.28.1-py2.py3-none-any.whl (202 kB)
     |████████████████████████████████| 202 kB 1.8 MB/s            
[?25hCollecting grpcio<2.0dev,>=1.38.1
  Downloading grpcio-1.41.0-cp39-cp39-macosx_10_10_x86_64.whl (3.9 MB)
     |████████████████████████████████| 3.9 MB 3.2 MB/s            
Collecting google-api-core[grpc]<3.0.0dev,>=1.29.0
  Downloading google_api_core-2.1.1-py2.py3-none-any.whl (95 kB)
     |████████████████████████████████| 95 kB 3.2 MB/s            
[?25hCollecting proto-plus>=1.10.0
  Downloading proto_plus-1.19.5-py3-none-any.whl (44 kB)
     |████████████████████████████████| 44 kB 3.1 MB/s            
[?25hCollecting google-resumable-media<3.0dev,>=0.6.0
  Using cached google_resumable_media-2.0.3-py2.py3-none-any.whl (75 kB)
Collecting google-cloud-core<3.0.0dev,>=1.4.1
  Downloading google_cloud_core-2.1.0-py2.py3-none-any.whl (27 kB)
Collecting protobuf>=3.12.0
  Downloading protobuf-3.19.0-cp39-

In [14]:
from datetime import date, timedelta
import pandas as pd
c = find_starting_date()
day = c.iloc[0]['block_timestamp'].date() + timedelta(days=500)
next_day = day + timedelta(days=1)
zf = find_all_txns_on_date(day, next_day)
day_as_timestamp = pd.to_datetime(day).timestamp()
timeframe = 8*60


In [15]:
from collections import defaultdict
import numpy as np
import networkx as nx
import pandas as pd
import math

df = zf.copy()
df = df.fillna(0)
df['is_contract'] = df.apply(lambda row: row['receipt_contract_address'] and not row['to_address'], axis=1)
df['value'] = df['value'].apply(float)
df['receipt_effective_gas_price'] = df['receipt_effective_gas_price'].apply(float)
df['receipt_cumulative_gas_used'] = df['receipt_cumulative_gas_used'].apply(float)
df['to_address'] = df.apply(lambda row: row['to_address'] if row['to_address'] else row['receipt_contract_address'], axis=1)
df['true_transfer_volume'] = df['value'] + df['receipt_effective_gas_price']*df['receipt_cumulative_gas_used']
df['grouping'] = df['block_timestamp'].apply(lambda x: math.floor((x.timestamp() - day_as_timestamp) / timeframe))
df['to-from'] = df['from_address'] + '-' + df['to_address']
df['from-to'] = df['to_address']  + '-' + df['from_address']
df = df[['grouping', 'to-from', 'from-to', 'from_address', 'to_address', 'block_number', 'block_timestamp', 'true_transfer_volume', 'is_contract', 'transaction_type', 'receipt_status']]
max_txn_volume = float(max(df['true_transfer_volume']))

def specialized_round(x, base=0.02):
    return base * round(x/base)


df['inverse_exponential_vol_alpha'] = df['true_transfer_volume'].apply(lambda x: 
    specialized_round(max(
        (
        ((float(x)/ max_txn_volume) ** (1/5) * 1), 
        0
        )
    ))
)

pivot = df.groupby(['from_address', 'to_address']).agg({ 'true_transfer_volume': sum})
pivot.to_csv('temp.csv')
pivot = pd.read_csv('temp.csv')
daily_transfers = defaultdict(float)
for idx, row in pivot.iterrows():
    from_add = row['from_address']
    to_add = row['to_address']
    normal = f"{from_add}-{to_add}"
    inverse = f"{to_add}-{from_add}"
    if daily_transfers[inverse] > 0:
        daily_transfers[inverse] = daily_transfers[inverse] + row['true_transfer_volume']
    else:
        daily_transfers[normal] = daily_transfers[normal] + row['true_transfer_volume']
pds = []
for key, value in daily_transfers.items():
    source, target = key.split('-')
    if value > 0:
        pds.append({"source": source, "target": target, "weight": value/10**18})
cf = pd.DataFrame(pds)

In [10]:
import networkx as nx
import forceatlas2
import pandas as pd

G = nx.from_pandas_edgelist(cf, source='source', target='target', edge_attr=['weight'])
nx.write_gexf(G, 'test.gexf')
forceatlas2.forceatlas2('test.gexf', target_change_per_node=0)

coords = pd.read_csv('test.gexf.coords.txt', sep='\t')

Oct 14, 2021 8:48:37 PM org.netbeans.modules.masterfs.watcher.Watcher getNotifierForPlatform
INFO: Native file watcher is disabled
Oct 14, 2021 8:48:37 PM org.gephi.utils.CharsetToolkit guessEncoding
INFO: Detected encoding UTF-8 in XML file
Oct 14, 2021 8:48:38 PM org.gephi.io.processor.plugin.DefaultProcessor process
INFO: # Nodes loaded: 16,938
Oct 14, 2021 8:48:38 PM org.gephi.io.processor.plugin.DefaultProcessor process
INFO: # Edges loaded: 23,770


*************************25%
*************************50%
*************************75%
*************************100%
Time = 13.915s


In [1]:
import plotly.graph_objs as go
from plotly.offline import iplot
import numpy as np
import bezier
import matplotlib.pyplot as plt
import networkx as nx
import igraph as ig
from tqdm import tqdm
import pandas as pd
from itertools import chain
from collections import defaultdict 

layt_coords = coords[['x', 'y', 'z']].values.tolist()
pds = pd.DataFrame(pds)
g = ig.Graph.DataFrame(pds, directed=False)

#layt = g.layout_fruchterman_reingold(dim=3)
positions = dict(zip([v.attributes()["name"] for v in np.array(g.vs)], layt_coords))
subset = pds[['source', 'target']]
edges = np.array([tuple(x) for x in subset.to_numpy()])
splines, splines_dict = curved_edges2(edges, positions)

node_dict = {v['name']: idx for idx,v in enumerate(list(g.vs))}
Edges=[(node_dict[pds.iloc[k]['source']], node_dict[pds.iloc[k]['target']]) for k in range(len(pds))]
Xn=[]
Yn=[]
Zn=[]

for k in tqdm(range(len(g.vs))):
  [x, y, z] = layt_coords[k]
  Xn.append(x)
  Yn.append(y)
  Zn.append(z)

Xe=[]
Ye=[]
Ze=[]

opacities = defaultdict(float)

def insert_nones_per_line_segment(arr):
    c = [*np.arange(0, len(arr), 2), len(arr)]
    new_arr = []
    for i in range(len(c)-1):
        new_arr += [*arr[c[i]:c[i+1]],None]
    return new_arr
    
for node_tup, splines in tqdm(splines_dict.items()):
    Xe += [(node_tup, [*splines[:,0],None],)]
    Ye += [(node_tup, [*splines[:,1],None])]
    Ze += [(node_tup, [*splines[:,2],None])]
line_traces = [go.Scatter3d(x=[*sp[:,0],None], y=[*sp[:,1],None], z=[*sp[:,2],None],mode='lines', opacity=0.8, line=dict(dash='solid', color='rgb(255,255,255)', width=1),hoverinfo='none') for node_tup, sp in tqdm(splines_dict.items())] 

STAR_COLOR_PALETTE = [
[175, 201, 255],
[199, 216, 255],
[255, 244, 243],
[255, 229, 207],
[255, 217, 178],
[255, 199, 142],
[255, 166, 81],
]


def create_node_colors(color_arr, opacity):
    rgb_str = ", ".join([str(i) for i in color_arr])
    return "rgba(" + rgb_str + ", " + str(opacity) + ")"

import random

d = [min(v.degree()+1, 25) for v in list(g.vs)]

node_fixed_colors = [(node.attributes()["name"], random.choice(STAR_COLOR_PALETTE)) for node in list(g.vs)]

trace2=go.Scatter3d(x=Xn, y=Yn, z=Zn, mode='markers', name='actors', 
                   marker=dict(symbol='circle', size=d, color=[create_node_colors(color,1) for node, color in node_fixed_colors] , 
                      line=dict(color='rgb(50,50,50)', width=0)), hoverinfo='text')

axis=dict(showbackground=False, showline=False, zeroline=False, showgrid=False, showticklabels=False, title='')

layout = go.Layout(
    width=4000,
    height=4000,
    showlegend=False,
    scene=dict(
        xaxis=dict(axis),
        yaxis=dict(axis),
        zaxis=dict(axis),
    ),
    updatemenus=[dict(
        type="buttons",
        buttons=[dict(label="Play",
            method="animate",
            args=[None]
        )]
    )]
)

def create_data_for_frame(k, df):
    print(k)
    time_slice = df[df['grouping']==k]
    tos = set(time_slice['to-from']).union(set(time_slice['from-to']))
    active_nodes = set([a for b in tos for a in b.split('-')])
    opacities = defaultdict(float)
    for index, row in time_slice.iterrows():
        if opacities[row['from-to']] > 0:
            opacities[row['from-to']] = row['inverse_exponential_vol_alpha']
        else:
            opacities[row['to-from']] = row['inverse_exponential_vol_alpha']
    traces_by_opacity = defaultdict(list)
    #print(set(opacities.keys()).difference(set(splines_dict.keys()).union(set(map(lambda x: x.split("-")[1] + "-" + x.split("-")[0], splines_dict.keys())))))
    for node_tup, opacity in opacities.items():
        inv_node_tup = node_tup.split("-")[1] + "-" + node_tup.split("-")[0]
        if node_tup in splines_dict or inv_node_tup in splines_dict:
            splines = splines_dict[node_tup] if node_tup in splines_dict else splines_dict[inv_node_tup]
            traces_by_opacity[opacity].append(splines)
    line_traces = []
    all_data = []
    for opacity, traces in traces_by_opacity.items():
        new_Xe = []
        new_Ye = []
        new_Ze = []
        for trace in traces:
            new_Xe += [*trace[:,0],None]
            new_Ye += [*trace[:,1],None]
            new_Ze += [*trace[:,2],None]
        all_data.append(go.Scatter3d(x=new_Xe, y=new_Ye, z=new_Ze, mode='lines', opacity=opacity, line=dict(dash='solid', color='rgb(200,200,200)', width=3),hoverinfo='none'))
    return [
        go.Scatter3d(x=Xn, y=Yn, z=Zn, mode='markers', name='actors', 
            marker=dict(symbol='circle', size=d, color=[create_node_colors(color, 1) if node in active_nodes else 'rgba(50,50,50,0.1)' for node, color in node_fixed_colors], 
            line=dict(color='rgb(50,50,50)', width=0)), hoverinfo='text'),   
    ] + all_data 



NUM_FRAMES = int(1440/8)
def rotate_z(x, y, z, theta):
    w = x+1j*y
    return np.real(np.exp(1j*theta)*w), np.imag(np.exp(1j*theta)*w), z

frames = []
x_eye, y_eye, z_eye = 1.25, 1.25, 0.8
data_for_all_frames = []
x_y_z_for_all_frames = []
max_traces_per_frame = -1
for k in range(NUM_FRAMES):
    xe, ye, ze = rotate_z(x_eye, y_eye, z_eye, -6.2*(k/NUM_FRAMES))
    traces = create_data_for_frame(k,df)
    if len(traces) > max_traces_per_frame:
        max_traces_per_frame = len(traces)
    data_for_all_frames.append(traces)
    x_y_z_for_all_frames.append({ "x": xe, "y": ye, "z": ze })
dummy_line_trace = max_traces_per_frame * [go.Scatter3d(x=[], y=[], z=[], mode='lines', line=dict(dash='solid', color='rgb(255,255,255)', width=2),hoverinfo='none')]
for idx, traces in enumerate(data_for_all_frames):
    length = len(traces)
    data_for_all_frames[idx] = data_for_all_frames[idx][1:] + dummy_line_trace[length+1:] + [data_for_all_frames[idx][0]]
#    print(data_for_all_frames[idx])
frames = [go.Frame(data=data, layout=dict(scene=dict(camera=dict(eye=x_y_z_for_all_frames[idx])))) for idx, data in tqdm(enumerate(data_for_all_frames))]
#layout=
data= [trace2, dummy_line_trace]
#print(data)
axis=dict(showbackground=False, showline=False, zeroline=False, showgrid=False, showticklabels=False, title='')
layout = go.Layout(
    width=4000,
    height=4000,
    showlegend=False,
    scene=dict(
        xaxis=dict(axis),
        yaxis=dict(axis),
        zaxis=dict(axis),
    ),
    updatemenus=[dict(
        type="buttons",
        buttons=[dict(label="Play",
            method="animate",
            args=[None]
        )]
    )]
)
fig=go.Figure(data=data_for_all_frames[0], frames=frames, layout=layout)
img_width = 5000
img_height = 5000
scale_factor = 0.3

# Add invisible scatter trace.
# This trace is added to help the autoresize logic work.
fig.add_trace(
    go.Scatter(
        x=[0, img_width * scale_factor],
        y=[0, img_height * scale_factor],
        mode="markers",
        marker_opacity=1
    )
)
fig.update_layout(transition = {'duration': 10})

# Configure axes
fig.update_xaxes(
    visible=False,
    range=[0, img_width * scale_factor]
)

fig.update_yaxes(
    visible=False,
    range=[0, img_height * scale_factor],
    # the scaleanchor attribute ensures that the aspect ratio stays constant
    scaleanchor="x"
)

from PIL import Image
imago = Image.open('./hubble-1632627.png')

fig.add_layout_image(
    dict(
        x=0,
        sizex=img_width * scale_factor,
        y=img_height * scale_factor,
        sizey=img_height * scale_factor,
        xref="x",
        yref="y",
        opacity=1.0,
        layer="below",
        sizing="stretch",
        source=imago)
)

# Configure other layout
fig.update_layout(
    width=img_width * scale_factor,
    height=img_height * scale_factor,
    margin={"l": 0, "r": 0, "t": 0, "b": 0},
)

# Disable the autosize on double click because it adds unwanted margins around the image
# More detail: https://plotly.com/python/configuration-options/
fig.show(config={'doubleClick': 'reset'})


iplot(fig, filename='Les-Miserables')
fig.write_html('hello.html')

KeyboardInterrupt: 