In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from datetime import datetime, timezone
from dateutil.relativedelta import relativedelta
from collections import Counter, defaultdict

from tqdm import tqdm

In [2]:
# Labeling (getting type and nametag to dictionary)

add_label = pd.read_csv("~/Project/Ethereum/dataset/addresslabel.txt", names = ["addID", "type", "nametag"] , sep = " ")
add_label = add_label.set_index("addID")
label_dic = {k: v.dropna().to_dict() for k,v in add_label.T.items()} # drop nan

# miner and genesis list
genesis = pd.read_csv("~/Project/Ethereum/dataset/genesis_labels.txt", usecols = [0,1], names = ["addID", "type"] , sep = " ")
genesis = genesis.set_index("addID")

miner = pd.read_csv("~/Project/Ethereum/dataset/blockminer.txt", names = ["blockID", "addID","uncle"], header=None, sep=' ')
miner = miner.drop_duplicates(["addID"])
miner["type"] = "miner" 
miner = miner[["addID", "type"]].set_index("addID")

# setting multitype node
labeldf = pd.concat([add_label, pd.get_dummies(add_label.type).rename(columns={'miner':'miner_label'}), 
                  pd.get_dummies(miner.type).rename(columns={'miner':'miner_block'}), 
                  pd.get_dummies(genesis.type)], axis=1)

labeldf['miner'] = np.where(((labeldf.miner_label == 1) | (labeldf.miner_block == 1)), 1, np.nan)

column_name = ['miner', 'genesis', 'Exchange', 'Gambling', 'ICO', 'Token Contract', 'nametag']
labeldf = labeldf[column_name].replace(0, np.nan)

node_label_dic = {k: v.dropna().to_dict() for k,v in labeldf.T.items()} # drop nan

In [3]:
len(node_label_dic), node_label_dic[756630]

(12488, {'Exchange': 1.0, 'nametag': 'Tidex 1'})

In [4]:
# transaction=pd.read_csv("transaction_data.csv")
transaction=pd.read_csv("../preprocessed/transaction_data_except_txfee.csv")
# timestamp to datetime 
transaction["txdatetime"] = transaction["txtime"].map(lambda x:datetime.fromtimestamp(x, timezone.utc))

# delete self transaction 
transaction = transaction[transaction['in']!=transaction['out']] # excluding self transaction 

In [5]:
monthly_network = []
date_idx = pd.date_range(start='08/01/2015', end='12/01/2016', freq='MS', tz='UTC') 

for time_from in tqdm(date_idx):    
    # aggregate with the time interval
    time_until = time_from + relativedelta(months=1)
    tx_bytime = transaction[(transaction.txdatetime >= time_from) & (transaction.txdatetime < time_until)] 
    real_tx = tx_bytime.groupby(["in", "out"], as_index=False).agg({"value": np.sum})
    real_tx = real_tx[real_tx["value"] != 0]
    real_tx["log_value"] = real_tx["value"].apply(lambda x: np.log10(x))
    
    tx_G_bytime = nx.from_pandas_edgelist(real_tx, source = "out", target = "in", edge_attr=["value"], create_using=nx.DiGraph())

    nx.set_node_attributes(tx_G_bytime, node_label_dic)

    # save_network 
    month = time_from.strftime('%Y%m')
    nx.write_gexf(tx_G_bytime, f'/data01/storage_damini/Ethereum_network/monthly_network_{month}.gexf')

100%|██████████| 17/17 [01:08<00:00,  4.03s/it]


In [8]:
G = nx.read_gexf('/data01/storage_damini/Ethereum_network/monthly_network_201508.gexf')

In [10]:
nx.get_node_attributes(G, 'Exchange')

{'11379': 1.0,
 '11668': 1.0,
 '11266': 1.0,
 '11618': 1.0,
 '14116': 1.0,
 '11513': 1.0}