In [138]:
import requests
import pandas as pd
import time
from datetime import datetime
from dateutil.parser import parse
import copy

In [66]:
def intersect(s1, s2):
    return(len(s1.intersection(s2))>0)

def map_addresses_with_id(ego_edge_list, identity):
    
    from_list = [nested for nested in get_id_from_address(ego_edge_list['from'], identity)]
    to_list = [nested for nested in get_id_from_address(ego_edge_list['to'], identity)]
    
    ego_edge_list['from_id'] = from_list
    ego_edge_list['to_id'] = to_list
    
    return(ego_edge_list)

def merger(identity):
    
    copy_identity = copy.deepcopy(identity)    
    end = len(identity['from'])
    idx = 0
    
    while idx < len(copy_identity['from']):

        from1 = copy_identity.iloc[idx,0]
        from1_idx = copy_identity.iloc[idx,:].index
        idxes_mtch = copy_identity.index[copy_identity['from'].apply(set).apply(intersect, s2=set(from1))].tolist()

        if len(idxes_mtch)>1:
            for to_remove in idxes_mtch[1:]:
                copy_identity.loc[idxes_mtch[0], 'from'].extend(copy_identity.loc[to_remove, 'from'])
            copy_identity.drop(idxes_mtch[1:], inplace = True)
            end = len(copy_identity['from'])
        idx += 1
        
    return(copy_identity)

In [2]:
def get_info_from_address(address):
    r =requests.get('https://blockchain.info/rawaddr/' + address).json()
    r = r['txs']
    return(r)

def get_transactions_from_addresses(address_list):
    
    raw_transactions = pd.DataFrame(columns=['from','to','amount','timestamp'])
    # Get table of transactions in raw form
    for address in address_list:
        try:
            request_info = get_info_from_address(address)
            raw_transactions = raw_transactions.append(get_edge_list(request_info))
            time.sleep(60)
            print('Completed address {}'.format(address))
        except:
            print(' Address {} raised exception'.format(address))
    
    raw_transactions.reset_index(inplace=True)
    
    return(raw_transactions)


In [6]:
def get_edge_list(info_from_address):
    
    senders = create_sender_list(info_from_address)
    receivers = create_receiver_list(info_from_address)
    
    edges_ego = pd.DataFrame({'from':senders, 'to':receivers, 'amount':amount_transferred_per_transaction(info_from_address), 'timestamp':create_timestamp_list_transaction(info_from_address)})
    
    return(edges_ego)

def create_receiver_list(transaction_ego):
    receiver_addresses = list()
    issues_receiver = list()
    amount = list()
    for j in range(len(transaction_ego)):
        receiver_addresses.append([])
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                receiver_addresses[-1].append(transaction_ego[j]['out'][i]['addr'])
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                issues.append([i,j])
    
    return(receiver_addresses)


def amount_transferred_per_transaction(transaction_ego):
    
    amount = list()
    for j in range(len(transaction_ego)):
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                 pass
    
    return(amount)


def create_sender_list(transaction_ego):
    
    sender_addresses = list()
    issues_sender = list()
    for j in range(len(transaction_ego)):
        sender_addresses.append([])
        for i in range(len(transaction_ego[j]['inputs'])):
            try:
                sender_addresses[-1].append(transaction_ego[j]['inputs'][i]['prev_out']['addr'])
            except:
                issues.append([i,j])
    return(sender_addresses)

def create_timestamp_list_transaction(transaction_ego):
    return([transaction_ego[i]['time'] for i in range(len(transaction_ego))])



In [119]:
# Mapping functions

# def get_id_from_address(address, identity_table):
#     return(identity_table[[address in nested for nested in identity_table['from']]]['id'].tolist()[0])
def mapping(address_list, identity):
    return([get_id_from_address(address, identity) for address in address_list])

def mapping_from(address_list, identity):
    return([get_id_from_address(address_list[0], identity)])

def get_id_from_address(address, identity_table):
    id_found = identity_table[identity_table['from'].apply(is_in_list, address = address)]['id'].tolist()
    
    if len(id_found)==1:
        return(id_found[0])
    
    elif len(id_found)>1:
        print(id_found)
    else:
        return(address)

def is_in_list(address_list, address):
    return(address in address_list)

In [58]:
def edge_creator(raw_transactions):
    
    edges = pd.DataFrame(columns = ['sender', 'receiver', 'amount', 'timestamp'])

    for index_raw in range(len(raw_transactions)):
        for index_receiver in range(len(raw_transactions.iloc[index_raw, -1])):
            sender = raw_transactions.iloc[index_raw, -2][0]
            receiver = raw_transactions.iloc[index_raw, -1][index_receiver]
            amount = raw_transactions.iloc[index_raw, -5][index_receiver]
            timestamp = raw_transactions.iloc[index_raw, -4]
            edges = edges.append({'sender':sender, 'receiver': receiver, 'amount': amount, 'timestamp': timestamp}, ignore_index=True)
    
    return(edges)

# Analysis of Terrorism Fund Raising

In [123]:
address_list = ['17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', 
                '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp', 
                '186YZVryvtxuXESLo1jzYU1xoRgyd5WARN', 
                '1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s', 
                '32cNfustcJXjz7afCGPuCuRpgZjcYJQsLE', 
                '3CUXTV35SteDufJyuSTSSWhHdrbnpbTXz9', 
                '3Fqne1QeMEGHVJwoz3m95fnDZvqfXL2Z4u', 
                '34xp4vRoCGJym3xR7yCVPFHoCNxv4Twseo', 
                '15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf', 
                '34GzR7ytFGSviY6CttWH3uDV6QEy2n1JcD', 
                '3434fpnej1Y9cKb5pYwGXoeTnptmW3bXdq', 
                '123kGrVjKmvQAnjuNDuxMCnscFjNxoxnym', 
                '12Cojd2nCLaqtmvKXaAC5FFZ8HpqUa5i5C', 
                '12DWpXck5B3oQsp18NG22v57eBdMcHz326',
                '14btpv9LnvJBRPwLqakvdHLUjfLsZrN7sK',
                '19P5khHYGdsyGPX4xH8RMQxNaWbRCiqzZ3', 
                '1s8nW9cXRJtdHd1hQ1ZFFvgoUNkjKJcf8', 
                '197BDiQuWki4egPACYj1f2UtdZ2d4QiFP5', 
                '1DZ9iLykDrZgjMURNkbmokAwv17VsW998u', 
                '16YRiGSGLxKdLQPxiWeTgt8jrpZwRcxjQ3', 
                '1LDQhgUTEQJqzGTRPUY5EAthJvktixMnLE',
                '1LdnTDv4Zs524aU2fifLi3vVHWhPL6Cgp1',
                '1PLQFgpdEJQGQ9rMjEWP3gdKv1bJGu9fse',
                '13kcud8fKBfeoRjym9wC7tfLu4skL5s27t',
                '1F3twjJjFh6DeQWFNUxVCdLvZk1xNsee7T',
                '1L6BbVSFpHCTibmHAGmtx4qVntRx2n58NX',
                '1PVQiWq9ds3CsHF2h2wF6qoccxVenprSoQ',
                '1AppUXgJnhGrBpbV1vG4XwnXUvenTYh3M4']

In [124]:
raw_transactions = get_transactions_from_addresses(address_list)
raw_transactions['id'] = range(len(raw_transactions))
# Create a table with only id and list of senders
identity = copy.deepcopy(raw_transactions.loc[:, ['from','id']])
identity = merger(identity)
raw_transactions['from_id'] = raw_transactions['from'].apply(mapping_from, identity = identity).apply(set).apply(list)
raw_transactions['to_id'] = raw_transactions['to'].apply(mapping, identity = identity)
edges = edge_creator(raw_transactions)

Completed address 17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD
Completed address 1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp
Completed address 186YZVryvtxuXESLo1jzYU1xoRgyd5WARN
Completed address 1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s
Completed address 32cNfustcJXjz7afCGPuCuRpgZjcYJQsLE
Completed address 3CUXTV35SteDufJyuSTSSWhHdrbnpbTXz9
Completed address 3Fqne1QeMEGHVJwoz3m95fnDZvqfXL2Z4u
Completed address 34xp4vRoCGJym3xR7yCVPFHoCNxv4Twseo
Completed address 15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf
Completed address 34GzR7ytFGSviY6CttWH3uDV6QEy2n1JcD
Completed address 3434fpnej1Y9cKb5pYwGXoeTnptmW3bXdq
Completed address 123kGrVjKmvQAnjuNDuxMCnscFjNxoxnym
Completed address 12Cojd2nCLaqtmvKXaAC5FFZ8HpqUa5i5C
Completed address 12DWpXck5B3oQsp18NG22v57eBdMcHz326
Completed address 14btpv9LnvJBRPwLqakvdHLUjfLsZrN7sK
Completed address 19P5khHYGdsyGPX4xH8RMQxNaWbRCiqzZ3
 Address 1s8nW9cXRJtdHd1hQ1ZFFvgoUNkjKJcf8 raised exception
Completed address 197BDiQuWki4egPACYj1f2UtdZ2d4QiFP5
Completed address 1DZ9iLykDrZgjMURNkbmo

In [126]:
edges.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/edges.csv', index=False)

In [147]:
edges['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges['timestamp']]

edges_2021 = edges[edges.Date>parse('2021-01-01')]
edges_2021.columns = ['source', 'target', 'amount', 'timestamp', 'Date']
edges_2021.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/edges_2021.csv', index=False)

In [144]:
identity.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/identity.csv', index=False)

In [149]:
for address in address_list:
    print(get_id_from_address(address, identity))

1
70
170
29
272
272
3Fqne1QeMEGHVJwoz3m95fnDZvqfXL2Z4u
299
392
400
402
269
269
269
272
70
70
70
70
70
70
70
70
70
70
70
70
70
