In [1]:
import requests
import pandas as pd
import time
from datetime import datetime
import copy

In [2]:
# Get information about the adress of interest through blockchain API

In [3]:
def create_receiver_list(transaction_ego):
    receiver_addresses = list()
    issues_receiver = list()
    amount = list()
    for j in range(len(transaction_ego)):
        receiver_addresses.append([])
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                receiver_addresses[-1].append(transaction_ego[j]['out'][i]['addr'])
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                issues.append([i,j])
    
    return(receiver_addresses)



def amount_transferred_per_transaction(transaction_ego):
    
    amount = list()
    for j in range(len(transaction_ego)):
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                 pass
    
    return(amount)




def create_sender_list(transaction_ego):
    
    sender_addresses = list()
    issues_sender = list()
    for j in range(len(transaction_ego)):
        sender_addresses.append([])
        for i in range(len(transaction_ego[j]['inputs'])):
            try:
                sender_addresses[-1].append(transaction_ego[j]['inputs'][i]['prev_out']['addr'])
            except:
                issues.append([i,j])
    return(sender_addresses)

def create_timestamp_list_transaction(transaction_ego):
    return([transaction_ego[i]['time'] for i in range(len(transaction_ego))])

def create_edge_list(sender_addresses, receiver_addresses, amount, timestamps_transactions, id_sender, id_receiver):
    
    data = pd.DataFrame([], )

    for nested in range(len(sender_addresses)):
        for sender in range(len(sender_addresses[nested])):
            for receiver in range(len(receiver_addresses[nested])):
                data = data.append(pd.DataFrame({'sender': sender_addresses[nested][sender], 'id_sender':id_sender[nested], 'receiver': receiver_addresses[nested][receiver], 'id_receiver': id_receiver[nested], 'amount': amount[nested][receiver], 'timestamp': timestamps_transactions[nested]}, index=[0]), ignore_index=True)
    return(data)

In [4]:
def get_identifier(hashes, identity):
    '''
    Given a list of senders or receivers and a identity table created with get_identity_table, return the addresses
    mapped with their ID
    '''
    renamed = list()
    for address_list in hashes:
        renamed.append([])
        match = False
        for address in address_list:
            for index_identity in range(len(identity)):
                if address in identity.iloc[index_identity, 0]:
                    renamed[-1].append(identity.iloc[index_identity, 1])
                    match = True
                    break
                if match == True:
                    break

            if match==False:
                renamed[-1].append(address)
    
    return(renamed)

In [5]:
def get_identity_table(senders):
    '''
    Map addresses that appear together as input in at least one transaction to a common ID
    '''
    identity = pd.DataFrame(columns=('address', 'id'))
    id_number = 1

    for nested_list in senders:
        match = False
        for address in nested_list:
            for i in range(len(identity)):
                if address in identity.iloc[i, 0]:
                    identity.iloc[i, 0] = list(set(identity.iloc[i, 0] + nested_list))
                    match = True
            if match==False:
                identity= identity.append({'address':nested_list, 'id':id_number}, ignore_index=True)
                id_number += 1
    
    identity = merge_clusters(identity)
    
    return(identity)               

In [6]:
def merge_clusters(identity):
    '''
    Merge two rows of the identity table if at least one item is in common
    '''
    i = 0
    j = 0
    while i < len(identity):
        j = 0
        while j < len(identity):
            if (i != j) & len(set(identity.iloc[i,0]).intersection(set(identity.iloc[j,0])))>0:
                identity.iloc[j,0] = identity.iloc[j,0] + identity.iloc[i,0]
                identity.drop(identity.index[i], inplace=True)
                
            j += 1
        i += 1
        
    return(identity)

In [7]:
def get_info_from_address(address):
    r =requests.get('https://blockchain.info/rawaddr/' + address).json()
    r = r['txs']
    return(r)

def get_edge_list(info_from_address):
    
    senders = create_sender_list(info_from_address)
    receivers = create_receiver_list(info_from_address)
    
    edges_ego = pd.DataFrame({'from':senders, 'to':receivers, 'amount':amount_transferred_per_transaction(info_from_address), 'timestamp':create_timestamp_list_transaction(info_from_address)})
    
    return(edges_ego)

def map_addresses_with_id(ego_edge_list, identity):
    
    from_list = [nested for nested in get_identifier(ego_edge_list['from'], identity)]
    to_list = [nested for nested in get_identifier(ego_edge_list['to'], identity)]
    
    ego_edge_list['from_id'] = from_list
    ego_edge_list['to_id'] = to_list
    
    return(ego_edge_list)

In [8]:
def flatten_nested_list(nested_list):
    return([item for nested in  nested_list for item in nested])

In [9]:
def get_other_addresses(edges_ego_mapped, identity_table, ego_address):
    
    other_addresses = {item for nested in edges_ego_mapped['to'] for item in nested if type(item)==str}
    other_id = {item for nested in edges_ego_mapped['to_id'] for item in nested  if type(item)==int}
    other_id_mapped = identity_table.loc[identity_table['id'].isin(list(other_id)), 'from'].tolist()
    other_id_mapped_flatten = {item for nested in other_id_mapped for item in nested}
    other_addresses = other_addresses.union(other_id_mapped_flatten)
    if ego_address in other_addresses:
        other_addresses.remove(ego_address)
        
    return(other_addresses)

def other_addresses_in_time_window(edges_ego_time_window, identity_table, ego_address):
    
    other_addresses = get_other_addresses(edges_ego_time_window, identity_table, ego_address)
    set_senders_time_window = set(flatten_nested_list(edges_ego_time_window['from']))
    set_receivers_time_window = set(flatten_nested_list(edges_ego_time_window['to']))                              
    others_time_window = other_addresses.intersection(set_senders_time_window.union(set_receivers_time_window))
    
    return(list(others_time_window))

In [10]:
def merge_id_clusters(identity):
    
    i = 0
    j = 0
    length_i = length_j =len(identity)
    
    while i < length_i:
        indeces_match = list()
        j = 0
        match = False
        while (j < length_j) &  (i < length_j):
            if (i != j) & len(set(identity.iloc[i,0]).intersection(set(identity.iloc[j,0])))>0:
                #identity.iloc[j,0].extend(identity.iloc[i,0])
                indeces_match.append(j)
                match = True
            j += 1
        if match:
            for index in indeces_match:
                identity.iloc[i,0].extend(identity.iloc[index,0])
                    
            identity.drop(identity.index[indeces_match], inplace = True)
            length_j = length_i = len(identity)

           
        i += 1

    return(identity)

In [11]:
#create_edge_list2(flatten(edges_ego_mapped_time_window['from_id'].tolist()),  edges_ego_mapped_time_window['to'].tolist(), edges_ego_mapped_time_window['amount'].tolist(), edges_ego_mapped_time_window['timestamp'].tolist(), flatten(edges_ego_mapped_time_window['to_id'].tolist()))

def get_receivers_total_amount_transaction_i(raw_edges, edge_index):
    trans = raw_edges.loc[raw_edges.timestamp==raw_edges.loc[edge_index, 'timestamp']]
    full_table = create_edge_list2(flatten(trans['from_id'].tolist()),  trans['to'].tolist(), trans['amount'].tolist(), trans['timestamp'].tolist(), flatten(trans['to_id'].tolist()))
                          
    #if len(full_table.loc[raw_edges.timestamp==raw_edges.loc[edge_index, 'timestamp'], ['to_id', 'amount']])>1:
    grouped_amount_received = full_table.loc[:,['to_id', 'amount']].groupby(['to_id']).sum().reset_index()
    return(grouped_amount_received)
#     else:
#         return(raw_edges.loc[raw_edges.timestamp==raw_edges.loc[edge_index, 'timestamp'], ['to_id', 'amount']])
#get_receivers_total_amount_transaction_i(edges_ego_mapped_time_window,1)


In [12]:
def flatten(nested_list):
    nested_list = [set(a) for a in nested_list]
    return([element for nested in nested_list for element in nested])

def create_edge_list2(sender_addresses, receiver_addresses, amount, timestamps_transactions, id_receiver):
    
    data = pd.DataFrame([], )

    for nested in range(len(sender_addresses)):
        for receiver in range(len(receiver_addresses[nested])):
            data = data.append(pd.DataFrame({'sender': sender_addresses[nested],  'receiver': receiver_addresses[nested][receiver], 'to_id': id_receiver[nested], 'amount': amount[nested][receiver], 'timestamp': timestamps_transactions[nested]}, index=[0]), ignore_index=True)
    return(data)

In [15]:
def get_edge_list_complete(edges_mapped):
    
    edge_list_complete = pd.DataFrame(columns = ['from', 'to', 'amount','timestamp'])
    for edge_index in range(len(edges_mapped)):
        #receivers = edges_mapped.loc[edges_mapped.timestamp==edges_mapped.loc[edge_index, 'timestamp'], ['to_id', 'amount']].groupby(['to_id']).sum().reset_index()
        receivers = get_receivers_total_amount_transaction_i(edges_mapped, edge_index)
        for receiver_index in range(len(receivers)):

            sender = edges_mapped.loc[edge_index, 'from_id'][0]
            timestamp = edges_mapped.loc[edge_index, 'timestamp']
            receiver = receivers.iloc[receiver_index, 0]
            amount = receivers.iloc[receiver_index, 1]

            edge_list_complete = edge_list_complete.append({'from':sender, 'to':receiver,
                                                           'amount':amount, 'timestamp':timestamp}, ignore_index=True)
    
    return(edge_list_complete)

In [53]:
def blockchain_network(address_list):
    raw_transactions = pd.DataFrame(columns=['from','to','amount','timestamp'])
    # Get table of transactions in raw form
    for address in address_list:
        request_info = get_info_from_address(address)
        raw_transactions = raw_transactions.append(get_edge_list(request_info))
        time.sleep(120)
        print('Completed address {}'.format(address))
    # erset index raws
    raw_transactions.reset_index(inplace=True)
    #assign id to each input list
    raw_transactions['id'] = range(len(raw_transactions))
    # Create a table with only id and list of senders
    identity = copy.deepcopy(raw_transactions.loc[:, ['from','id']])
    # Merge id that share at least one address
    identity = merge_id_clusters(identity)
    # map from and to column with id 
    edges_ego_mapped = map_addresses_with_id(raw_transactions, identity)
    #get data of the period under analysis
    edges_ego_mapped['date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges_ego_mapped['timestamp']]
    
    #edges_ego_mapped_time_window = edges_ego_mapped[(edges_ego_mapped.date >= DATE_FROM) & (edges_ego_mapped.date <= DATE_TO)]
    
    # Create edge list cleaned
    edges_complete_ego = get_edge_list_complete(edges_ego_mapped)
    #get account of the others in the transactions
    #other_addresses_time_window = other_addresses_in_time_window(edges_ego_mapped_time_window, identity, EGO_ADDRESS)
    
    return({'edges':edges_complete_ego, 'identity':identity})

# Analysis of  Terrorism Fund  Raising
## account 17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD

In [30]:
DATE_FROM = pd.to_datetime('2021-01-01')
DATE_TO = pd.to_datetime('2021-05-01')

### Insert the addresses to scrape in the address_list

In [29]:
address_list = ['17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp', '186YZVryvtxuXESLo1jzYU1xoRgyd5WARN', '1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s']

In [59]:
# Scrape the info and convert it into an edge table.

In [54]:
net = blockchain_network(address_list)

Completed address 17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD
Completed address 1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp
Completed address 186YZVryvtxuXESLo1jzYU1xoRgyd5WARN
Completed address 1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s


In [58]:
edges = net['edges']
edges.columns = ['source', 'target', 'amount', 'timestamp']
edges.to_csv('edges_transactions', index=False)

In [None]:
# OLD STAFF (TO DELETE)

In [119]:
# Define constants
EGO_ADDRESS = '17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD'
DATE_FROM = pd.to_datetime('2021-01-01')
DATE_TO = pd.to_datetime('2021-05-01')

In [73]:
# request inofrmation about this account
transaction_ego = get_info_from_address(EGO_ADDRESS)

In [235]:
data = blockchain_network(transaction_ego, EGO_ADDRESS, DATE_FROM, DATE_TO)
edges = data['edges']
identity = data['identity']
others = data['others']

In [236]:
data_others = dict()

for other in others:
    transactions_other = get_info_from_address(other)
    data_others[other] = blockchain_network(transactions_other, other, DATE_FROM, DATE_TO)

In [238]:
data_others.keys()

dict_keys(['186YZVryvtxuXESLo1jzYU1xoRgyd5WARN', '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp'])

In [241]:
data_others['186YZVryvtxuXESLo1jzYU1xoRgyd5WARN']['edges']

Unnamed: 0,from,to,amount,timestamp
0,0,35chPAg9KWBo8pmcAab15CjWGjvat7A91J,81809,1617618164
1,1,0,216944,1617004693


In [242]:
data_others['1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp']['edges']

Unnamed: 0,from,to,amount,timestamp
0,81,3AqaajWfJtiDJtFLM8DCxNt62HE8iRLptA,1119596581,1612438752
1,1,81,5227212,1612436104
2,81,3NTFsy6EhPgMofXS9c5U79TZrJZbdZcTyD,484197203,1611503807
3,3,81,16806564,1611501360
4,81,3KvDs3fj53FGyyzcpZ9t3rBZ5AQFY5AzMB,543144026,1611489405
5,5,17MK6zezZyxoEjwamGKqtbpoZau8K8dprH,7133684,1611481841
6,5,81,13957844,1611480569
7,81,3Khnx4ke7kYMVfnCXeTbXgwwF9Su9vWgMP,427605643,1611395813
8,8,81,1880354,1611318737
9,81,3JHMqFYDsy1mk1madCfpBcyLsDzPfiatYs,1205076934,1611259023


In [246]:
for key in data_others.keys():
    data_others[key]['edges'].to_csv('./edge_folder/edges_heuristic_input/'+ key + '_edges.csv')
    data_others[key]['identity'].to_csv('./edge_folder/edges_heuristic_input/' + key +'_identity.csv')
    #data_others[key]['others'].to_csv('./edge_folder/edges_heuristic_input/' + key + '_others.csv')

In [245]:
data_others[key]['edges']

Unnamed: 0,from,to,amount,timestamp
0,0,35chPAg9KWBo8pmcAab15CjWGjvat7A91J,81809,1617618164
1,1,0,216944,1617004693


In [78]:
# Get table of transactions in raw form
raw_transactions = get_edge_list(transaction_ego)
#assign id to each input list
raw_transactions['id'] = range(len(raw_transactions))
# Create a table with only id and list of senders
identity = copy.deepcopy(raw_transactions.loc[:, ['from','id']])
# Merge id that share at least one address
identity = merge_id_clusters(identity)
# map from and to column with id 
edges_ego_mapped = map_addresses_with_id(raw_transactions, identity)

In [80]:
# Take only transactions of period for analysis
DATE_FROM = pd.to_datetime('2021-01-01')
DATE_TO = pd.to_datetime('2021-04-01')
edges_ego_mapped['date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges_ego_mapped['timestamp']]
edges_ego_mapped_time_window = edges_ego_mapped[(edges_ego_mapped.date >= DATE_FROM) & (edges_ego_mapped.date <= DATE_TO)]

In [85]:
edges_complete_ego = get_edge_list_complete(edges_ego_mapped_time_window)

In [86]:
edges_complete_ego.to_csv('edges_complete_ego.csv')
identity.to_csv('identity_ego.csv')

In [90]:
other_addresses_time_window = other_addresses_in_time_window(edges_ego_mapped_time_window, identity, EGO_ADDRESS)

In [91]:
other_addresses_time_window

['186YZVryvtxuXESLo1jzYU1xoRgyd5WARN', '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp']

In [12]:
r_other1 =requests.get('https://blockchain.info/rawaddr/' + '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp')

In [13]:
edges = pd.read_csv('/home/massi/my_project_dir/edges_1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp.csv').iloc[:, 1:]

In [19]:
DATE = '2021-01-01'
transactions_data = r_other1.json()['txs']
transactions_row = get_ego_edge_list(transactions_data)
transactions_row['id_sender'] = range(len(transactions_row))

In [17]:
edges_dict = edges.to_dict('index')
for key in edges_dict.keys():
    edges_dict[key]['from'] = [item.replace("'",'') for item in edges_dict[key]['from'].strip('[').strip(']').split(', ')]
    edges_dict[key]['to'] = [item.replace("'",'') for item in edges_dict[key]['to'].strip('[').strip(']').split(', ')]


In [23]:
identity = pd.DataFrame(edges_dict).transpose()
identity['id'] = list(identity.index)
identity = identity.iloc[:,[0,-1]]

100

In [31]:
identity = merge_id_clusters(identity)

In [47]:
edges_other_mapped = map_addresses_with_id(transactions_row, identity)

In [58]:
edges_other_mapped['amount'] = [string.strip('[').strip(']').split(', ') for string in edges_other_mapped['amount']]

In [66]:
edge_list_complete.to_csv('./edge_list_renamed_cEp')
identity.to_csv('./identity_cEp')

In [117]:
edges_mapped = map_addresses_with_id(edges, identity)
edges_mapped['date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges_mapped['timestamp']]
edges_mapped_time_window = edges_mapped[edges_mapped.date >= pd.to_datetime(DATE)]

Unnamed: 0,from,to,amount,timestamp,from_id,to_id,date
0,"[1BThnRwVPmAnfhk3jv4sbu2asqzwAYnD1a, 1P83QW4sK...","[1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s, 3AqaajWfJ...","[1118471008, 1125573]",1612438752,"{64, 67, 69, 72, 41, 75, 78, 49, 53, 59}","{3AqaajWfJtiDJtFLM8DCxNt62HE8iRLptA, 1NDyJtNTj...",2021-02-04 12:39:12
1,[14hMndJ5eQpeWCS5gGC8jWESMwC3hYp7w6],"[15DKCQxWSucdcUpB6Nb54nEsbsVjVyWvdx, 1Lm9BCDUK...","[781, 5226431]",1612436105,{2},"{64, 67, 69, 72, 41, 75, 15DKCQxWSucdcUpB6Nb54...",2021-02-04 11:55:05
2,"[18Jdg4mkD2Q99yRR3bUwjAQKfjAFQQSeGF, 1FiamM4mj...","[3NTFsy6EhPgMofXS9c5U79TZrJZbdZcTyD, 1NDyJtNTj...","[157143, 484040060]",1611503807,"{64, 67, 69, 72, 41, 75, 78, 49, 53, 59}","{3NTFsy6EhPgMofXS9c5U79TZrJZbdZcTyD, 1NDyJtNTj...",2021-01-24 16:56:47
3,"[1Ps2wUfQMqyyqYSQ5ZRGdq4XXwUycNP8ZL, 1CLdHUNzv...","[14pyqXYhupm1tBQt45FfD2Yz5kpq5P5KfE, 1Lm9BCDUK...","[1648848, 15157716]",1611501360,{3},"{64, 67, 14pyqXYhupm1tBQt45FfD2Yz5kpq5P5KfE, 6...",2021-01-24 16:16:00
4,"[13BTP1bMkbEVEos9bquDGxGjstjzCk6zM5, 1Lm9BCDUK...","[1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s, 3KvDs3fj5...","[542817721, 326305]",1611489405,"{64, 67, 69, 72, 41, 75, 78, 49, 53, 59}","{1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s, 3KvDs3fj5...",2021-01-24 12:56:45
5,[1P6d5jNuDfpbYw5otcsYkCTE6HF4kUfCDA],"[17MK6zezZyxoEjwamGKqtbpoZau8K8dprH, 1Lm9BCDUK...","[2543626, 4590058]",1611481841,{4},"{64, 67, 69, 72, 41, 17MK6zezZyxoEjwamGKqtbpoZ...",2021-01-24 10:50:41
6,[1P6d5jNuDfpbYw5otcsYkCTE6HF4kUfCDA],"[1C42APDuK7mKENLRhN3nD935tGqpsqsB7G, 1Lm9BCDUK...","[19283, 13938561]",1611480569,{4},"{64, 67, 69, 72, 41, 59, 75, 78, 49, 53, 1C42A...",2021-01-24 10:29:29
7,"[1NuYKc3wNzzxvdELSewj9qe6wPLjrhYDwQ, 1Ab4DqBsA...","[3Khnx4ke7kYMVfnCXeTbXgwwF9Su9vWgMP, 1NDyJtNTj...","[942732, 426662911]",1611395813,"{64, 67, 69, 72, 41, 75, 78, 49, 53, 59}","{1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s, 3Khnx4ke7...",2021-01-23 10:56:53
8,"[1sBFdf44aRKBUczFzG1mTH9wKREpLJSvG, 1BYJPeaqkS...","[1J741yhh4KEUGQ11b8QLDosjqz1BrfUmE, 1Lm9BCDUKo...","[1803, 1878551]",1611318737,{6},"{64, 67, 69, 1J741yhh4KEUGQ11b8QLDosjqz1BrfUmE...",2021-01-22 13:32:17
9,"[1GQV4TiHJPabr5mo3BPjU2e7Zto4Pfz51V, 1CziT4X7H...","[1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s, 3JHMqFYDs...","[1203722888, 1354046]",1611259023,"{64, 67, 69, 72, 41, 75, 78, 49, 53, 59}","{1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s, 3JHMqFYDs...",2021-01-21 20:57:03


In [10]:
ego = '17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD'

In [11]:
r =requests.get('https://blockchain.info/rawaddr/17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD')

In [18]:
# represent the inormation in json format
r_js = r.json()

# information about the transactions of this account
transaction_ego = r_js['txs']

In [16]:
senders = create_sender_list(transaction_ego)
senders = [list(set(nested)) for nested in senders]

receivers = create_receiver_list(transaction_ego)
receivers = [list(set(nested)) for nested in receivers]

In [205]:
identity = get_identity_table(senders)
identity = merge_clusters(identity)

In [232]:
from_list = [set(nested) for nested in get_identifier(senders, identity)]
to_list = [set(nested) for nested in get_identifier(receivers, identity)]

### Create an edge list of the transactions of ego with ID instead on names where possible

In [237]:
edges_ego = pd.DataFrame({'from':from_list, 'to':to_list, 'amount':amount_transferred_per_transaction(transaction_ego)})

In [36]:
sender_addresses = create_sender_list(transaction_ego)
receiver_addresses = create_receiver_list(transaction_ego)
timestamps_transactions = create_timestamp_list_transaction(transaction_ego)
amount = amount_transferred_per_transaction(transaction_ego)

In [37]:
data = create_edge_list(sender_addresses, receiver_addresses, amount, timestamps_transactions)

In [38]:
data['datetime'] = [pd.to_datetime(time.strftime('%A, %Y-%m-%d %H:%M:%S', time.localtime(timestamp))) for timestamp in data['timestamp']]

In [42]:
data_ego_2021 = data[data.datetime>=pd.to_datetime('2021-01-01')]

In [43]:
OUT_PUT_PATH = 'edge_folder/edges_2021/ego_edges_2021.csv'
data_ego_2021.to_csv(OUT_PUT_PATH)

In [13]:
len(sender_addresses), len(receiver_addresses), len(timestamps_transactions)

(70, 70, 70)

In [81]:
OUT_PUT_PATH = 'edge_folder/ego_edges1.csv'
data.to_csv(OUT_PUT_PATH)

In [27]:
others = list(set(pd.concat([data.loc[data.datetime>=pd.to_datetime('2021-01-01'), 'receiver'], data.loc[data.datetime>=pd.to_datetime('2021-01-01'), 'sender']]).tolist()))
others.remove(ego)

In [31]:
len(others)

4

In [32]:
def get_edgelist_wallet(address):
    
    r =requests.get('https://blockchain.info/rawaddr/' + address)
    # represent the inormation in json format
    r_js = r.json()

    # information about the transactions of this account
    transaction_ego = r_js['txs']

    sender_addresses = create_sender_list(transaction_ego)
    receiver_addresses = create_receiver_list(transaction_ego)
    timestamps_transactions = create_timestamp_list_transaction(transaction_ego)
    amount = amount_transferred_per_transaction(transaction_ego)

    data = create_edge_list(sender_addresses, receiver_addresses, amount, timestamps_transactions)
    data['datetime'] = [pd.to_datetime(time.strftime('%A, %Y-%m-%d %H:%M:%S', time.localtime(timestamp))) for timestamp in data['timestamp']]
    data = data[data.datetime>=pd.to_datetime('2021-01-01')]
    return(data)

In [35]:
for i in range(len(others)):
    data = get_edgelist_wallet(others[i])
    OUT_PUT_PATH = 'edge_folder/edges_2021/edges_' + others[i] + '.csv'
    data.to_csv(OUT_PUT_PATH)

In [None]:
import os
import glob

os.chdir("./edge_folder/edges_2021")
file_extension = '.csv'
all_filenames = [i for i in glob.glob(f"*{file_extension}")]

In [56]:
file_extension = '.csv'
all_filenames = [i for i in glob.glob(f"*{file_extension}")]

combined_csv_data = pd.concat([pd.read_csv(f) for f in all_filenames])
others = list(set(pd.concat([combined_csv_data['receiver'], combined_csv_data['sender']]).tolist()))

OUT_PUT_PATH = './ego_network_2021.csv'
combined_csv_data.to_csv(OUT_PUT_PATH)

In [76]:
others = list(set(pd.concat([combined_csv_data['receiver'], combined_csv_data['sender']]).tolist()))
others = pd.DataFrame({'id':others})
others['label'] = ['target' if target==ego else 'other' for target in others['id'] ]
others[others.id==ego]

Unnamed: 0,id,label
1748,17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD,target


In [77]:

OUT_PUT_PATH = './nodes_ego_network_2021.csv'
others.to_csv(OUT_PUT_PATH)