In [3]:
import requests
import pandas as pd
import time
from datetime import datetime
from dateutil.parser import parse
import copy

In [278]:
def intersect(s1, s2):
    return(len(s1.intersection(s2))>0)

def map_addresses_with_id(ego_edge_list, identity):
    
    from_list = [nested for nested in get_id_from_address(ego_edge_list['from'], identity)]
    to_list = [nested for nested in get_id_from_address(ego_edge_list['to'], identity)]
    
    ego_edge_list['from_id'] = from_list
    ego_edge_list['to_id'] = to_list
    
    return(ego_edge_list)

def merger(identity):
    
    copy_identity = copy.deepcopy(identity)    
    end = len(identity['from'])
    idx = 0
    
    while idx < len(copy_identity['from']):

        from1 = copy_identity.iloc[idx,0]
        from1_idx = copy_identity.iloc[idx,:].index
        idxes_mtch = copy_identity.index[copy_identity['from'].apply(set).apply(intersect, s2=set(from1))].tolist()

        if len(idxes_mtch)>1:
            for to_remove in idxes_mtch[1:]:
                #copy_identity.loc[idxes_mtch[0], 'from'].extend(copy_identity.loc[to_remove, 'from'])
                copy_identity.at[idxes_mtch[0], 'from'] = copy_identity.at[idxes_mtch[0], 'from'] + copy_identity.at[to_remove, 'from']
            copy_identity.drop(idxes_mtch[1:], inplace = True)
            
            end = len(copy_identity['from'])
        idx += 1
        
    return(copy_identity)

In [279]:
def get_info_from_address(address):
    r =requests.get('https://blockchain.info/rawaddr/' + address).json()
    r = r['txs']
    return(r)

def get_transactions_from_addresses(address_list):
    
    raw_transactions = pd.DataFrame(columns=['from','to','amount','timestamp'])
    # Get table of transactions in raw form
    for address in address_list:
        try:
            request_info = get_info_from_address(address)
            raw_transactions = raw_transactions.append(get_edge_list(request_info))
            time.sleep(20)
            print('Completed address {}'.format(address))
        except:
            print(' Address {} raised exception'.format(address))
    
    raw_transactions.reset_index(inplace=True)
    
    return(raw_transactions)


In [280]:
def get_edge_list(info_from_address):
    
    senders = create_sender_list(info_from_address)
    receivers = create_receiver_list(info_from_address)
    
    edges_ego = pd.DataFrame({'from':senders, 'to':receivers, 'amount':amount_transferred_per_transaction(info_from_address), 'timestamp':create_timestamp_list_transaction(info_from_address)})
    
    return(edges_ego)

def create_receiver_list(transaction_ego):
    receiver_addresses = list()
    issues_receiver = list()
    amount = list()
    for j in range(len(transaction_ego)):
        receiver_addresses.append([])
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                receiver_addresses[-1].append(transaction_ego[j]['out'][i]['addr'])
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                issues.append([i,j])
    
    return(receiver_addresses)


def amount_transferred_per_transaction(transaction_ego):
    
    amount = list()
    for j in range(len(transaction_ego)):
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                 pass
    
    return(amount)


def create_sender_list(transaction_ego):
    
    sender_addresses = list()
    issues_sender = list()
    for j in range(len(transaction_ego)):
        sender_addresses.append([])
        for i in range(len(transaction_ego[j]['inputs'])):
            try:
                sender_addresses[-1].append(transaction_ego[j]['inputs'][i]['prev_out']['addr'])
            except:
                issues.append([i,j])
    return(sender_addresses)

def create_timestamp_list_transaction(transaction_ego):
    return([transaction_ego[i]['time'] for i in range(len(transaction_ego))])



In [281]:
# Mapping functions

# def get_id_from_address(address, identity_table):
#     return(identity_table[[address in nested for nested in identity_table['from']]]['id'].tolist()[0])
def mapping(address_list, identity):
    return([get_id_from_address(address, identity) for address in address_list])

def mapping_from(address_list, identity):
    return([get_id_from_address(address_list[0], identity)])

def get_id_from_address(address, identity_table):
    id_found = identity_table[identity_table['from'].apply(is_in_list, address = address)]['id'].tolist()
    
    if len(id_found)==1:
        return(id_found[0])
    
    elif len(id_found)>1:
        print(id_found)
    else:
        return(address)

def is_in_list(address_list, address):
    return(address in address_list)

In [282]:
def edge_creator(raw_transactions):
    
    edges = pd.DataFrame(columns = ['sender', 'receiver', 'amount', 'timestamp'])

    for index_raw in range(len(raw_transactions)):
        for index_receiver in range(len(raw_transactions.iloc[index_raw, -1])):
            sender = raw_transactions.iloc[index_raw, -2][0]
            receiver = raw_transactions.iloc[index_raw, -1][index_receiver]
            amount = raw_transactions.iloc[index_raw, -5][index_receiver]
            timestamp = raw_transactions.iloc[index_raw, -4]
            edges = edges.append({'sender':sender, 'receiver': receiver, 'amount': amount, 'timestamp': timestamp}, ignore_index=True)
    
    return(edges)

In [283]:
def flatten_nested_list(nested_list):
    return([item for nested in  nested_list for item in nested])

# Analysis of Terrorism Fund Raising

In [9]:
# address_list = ['17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', 
#                 '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp', 
#                 '186YZVryvtxuXESLo1jzYU1xoRgyd5WARN', 
#                 '1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s', 
#                 '32cNfustcJXjz7afCGPuCuRpgZjcYJQsLE', 
#                 '3CUXTV35SteDufJyuSTSSWhHdrbnpbTXz9', 
#                 '3Fqne1QeMEGHVJwoz3m95fnDZvqfXL2Z4u', 
#                 '34xp4vRoCGJym3xR7yCVPFHoCNxv4Twseo', # receiver of 1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s (it made a lot of transactions and a lot of money traffic)
#                 '15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf', 
#                 '34GzR7ytFGSviY6CttWH3uDV6QEy2n1JcD', 
#                 '3434fpnej1Y9cKb5pYwGXoeTnptmW3bXdq', 
#                 '123kGrVjKmvQAnjuNDuxMCnscFjNxoxnym', 
#                 '12Cojd2nCLaqtmvKXaAC5FFZ8HpqUa5i5C', 
#                 '12DWpXck5B3oQsp18NG22v57eBdMcHz326',
#                 '14btpv9LnvJBRPwLqakvdHLUjfLsZrN7sK',
#                 '19P5khHYGdsyGPX4xH8RMQxNaWbRCiqzZ3', 
#                 '1s8nW9cXRJtdHd1hQ1ZFFvgoUNkjKJcf8', 
#                 '197BDiQuWki4egPACYj1f2UtdZ2d4QiFP5', 
#                 '1DZ9iLykDrZgjMURNkbmokAwv17VsW998u', 
#                 '16YRiGSGLxKdLQPxiWeTgt8jrpZwRcxjQ3', 
#                 '1LDQhgUTEQJqzGTRPUY5EAthJvktixMnLE',
#                 '1LdnTDv4Zs524aU2fifLi3vVHWhPL6Cgp1',
#                 '1PLQFgpdEJQGQ9rMjEWP3gdKv1bJGu9fse',
#                 '13kcud8fKBfeoRjym9wC7tfLu4skL5s27t',
#                 '1F3twjJjFh6DeQWFNUxVCdLvZk1xNsee7T',
#                 '1L6BbVSFpHCTibmHAGmtx4qVntRx2n58NX',
#                 '1PVQiWq9ds3CsHF2h2wF6qoccxVenprSoQ',
#                 '1AppUXgJnhGrBpbV1vG4XwnXUvenTYh3M4', 
                
#                 'bc1qwfgdjyy95aay2686fn74h6a4nu9eev6np7q4fn204dkj3274frlqrskvx0',
#                '1AupiZBi1bMVfcMgfFx4qcPtVWVgdvE29c',  # receiver of 1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s
#                  '37tRFZw7n94Jddq6TfVs3MbCXmDX6eMfeY' 
#                 '13s7sRxYi9iiCqgbefhk1MQiGzj9LNWUsy',
#                 '14xQDjyBVXHFRpi9q2HDt1wTCigkDjRt8j', 
#                 '1CT58z7sbUyAEZPVkeqnMpnSBBCEEoSbfQ',
#                  '1GX84MiXv4SjpPgTYjenmDxn3HL3nLJkot', 
#                 '1BixJDFzEBmh7uSzbebmeugro4i6G54ga3',
#                 '1P8AYmUjH3kqoaW1qNXYBYKUA5RQ97ryEd',
#                 '16rhvDAd64fMhpFcH5N7sh7QdyLLGy9M44', 
#                 '12jwNYodFi926PeMGpaqhdq7ZFLn2VAeon',
#                 '16svEnyvUqFdXphdPC7ydPcAJAqYHXAhaa',
#                  '1FYk7mGjbfW1jVz2dWNUYReENEJ6wqta9a', 
#                 '1A3VDjFVEToupAAQmyE2wSq2rNz3iuEqTX',
#                 '13LNRzkFC8FMyZsosWtLcdcTiMLoVvLufk',
#                 '13QWZ1sv5wwPfZxAa8a6MPeEPuVA7MK8rF',
#                 '1AWGdz1PguoHPJ2zKHMnRthC7LoCbVf3PB', 
#                 '191BDVckuiY6Sf8QBWynCB81ib5JVwKhDx',
#                 '1HUEp4RGdVYYuJWB4jkJ9B1UTLW7wqNC6X'
#                ]

In [10]:
address_list = ['1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp']

In [205]:
raw_transactions = get_transactions_from_addresses(address_list)

Completed address 1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp


In [347]:
# Covert timestamps into date objects
raw_transactions['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in raw_transactions['timestamp']]

#### Number of different accounts before heuristics:

In [348]:
number_of_accounts = len(set(flatten_nested_list(raw_transactions.loc[raw_transactions.Date>parse('2021-01-01'), 'from']) + flatten_nested_list(raw_transactions.loc[raw_transactions.Date>parse('2021-01-01'), 'to'])))
print('The total number of different accounts before heuristics is {}'.format(number_of_accounts))

The total number of different accounts before heuristics is 2500


In [210]:
# Remove Date column as not necessary
raw_transactions.drop(columns=['Date'], inplace = True)

### Apply Input Heuristic

In [211]:
raw_transactions['id'] = range(len(raw_transactions))
identity = copy.deepcopy(raw_transactions.loc[:, ['from','id']])
identity = merger(identity)

#### Map accounts' addresses with unique identifier

In [None]:
raw_transactions['from_id'] = raw_transactions['from'].apply(mapping_from, identity = identity).apply(set).apply(list)
raw_transactions['to_id'] = raw_transactions['to'].apply(mapping, identity = identity)
edges = edge_creator(raw_transactions)

In [141]:
#edges.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/edges.csv', index=False)

#### Take only transactions happened in 2021

In [213]:
edges['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges['timestamp']]

edges_2021 = edges[edges.Date>parse('2021-01-01')]
edges_2021.columns = ['source', 'target', 'amount', 'timestamp', 'Date']
#edges_2021.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/edges_2021.csv', index=False)

#### Store accounts in the output that are not mapped yet

In [92]:
tb_scraped = list(set(edges_2021.loc[[type(value) != int for value in edges_2021['target']], 'target']))

In [93]:
list(set(edges_2021.loc[[type(value) == int for value in edges_2021['source']], 'source']))

[0,
 1,
 3,
 5,
 8,
 10,
 12,
 15,
 16,
 17,
 19,
 21,
 22,
 23,
 25,
 27,
 29,
 31,
 33,
 35,
 37,
 40,
 41,
 43]

In [144]:
#identity.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/identity.csv', index=False)

# Second Heuristic

In [15]:
import json

In [16]:
output_dict = dict()

In [17]:
for address in tb_scraped:
    
    time.sleep(20)

    try:    
        output_address_data = requests.get('https://blockchain.info/rawaddr/{}'.format(address)).json()
        output_dict[address] = output_address_data

        with open('1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp_connections.json', 'w') as fp:
            json.dump(output_dict, fp)

        print("Succesfully scraped {}".format(address))
    except:
        print("Error at address {}".format(address))

Succesfully scraped 1NmZ7m1QcCamK7iSdZH1zgCDVyjnx8CXRj
Succesfully scraped 3D6GyDZ6dhZNwSi7VJbZJCRvs5Qh2oyLEr
Succesfully scraped 1L2gjnutsMWtc1JWop4UHVG6opeNV2xcDZ
Succesfully scraped 1PQzZ2J722CSmQtTfH1bz4R5dra9bUmhvT
Succesfully scraped 37VLX47ohg8QMFCxx9HK4mahwCfUwGZhDu
Succesfully scraped 3N9gxm87Aodw76bqiFQqHZqQSLCLHpKs2d
Succesfully scraped 3P3GkjmUvUVfdW8LdzLMSE4QeCWUnHe1Nn
Succesfully scraped 3MQ98rXmQUT5wpzhWdkaMTqHitJyY7B2E8
Succesfully scraped 1Dc95vxwvjLtpPDmXs8VUwcx6jwCau7u9t
Succesfully scraped 3Fqg2J4Ca9qgkrzEr3qd6wJTM6WZyQespo
Succesfully scraped 19Vhh4DvLq6MDUpDefbTd7uLVZLSkbmw43
Succesfully scraped 1CDFSTSAUR5cnvt5w7YHhs1T4BqpFRPiU2
Succesfully scraped 1GVwJyM7xPKEn4BLc9LNnLtkR5Ri2HBzQB
Succesfully scraped 1H3kdzidJuH2XdMmzjXZgPEUNWBn9XJcSR
Succesfully scraped 179oCr2hjoojzcnrDXVEFZKrwFPULhfeV6
Succesfully scraped 3F2vdQgLzZTG2MDAPSaeN1QznbmsK9jB8m
Succesfully scraped 3KRY62z3QKCyB2yAhM53P7CyFw1rFeuEWZ
Succesfully scraped 1PpFN3ESN3VpuJuuaGmLDNCBAtwDFD6uRx
Succesfull

In [31]:
for nested in identity['from']:
    
    address = nested[0]
    
    time.sleep(20)

    try:    
        output_address_data = requests.get('https://blockchain.info/rawaddr/{}'.format(address)).json()
        output_dict[address] = output_address_data

        with open('1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp_connections.json', 'w') as fp:
            json.dump(output_dict, fp)

        print("Succesfully scraped {}".format(address))
    except:
        print("Error at address {}".format(address))

Succesfully scraped 1BThnRwVPmAnfhk3jv4sbu2asqzwAYnD1a
Succesfully scraped 14hMndJ5eQpeWCS5gGC8jWESMwC3hYp7w6
Succesfully scraped 1Ps2wUfQMqyyqYSQ5ZRGdq4XXwUycNP8ZL
Succesfully scraped 1P6d5jNuDfpbYw5otcsYkCTE6HF4kUfCDA
Succesfully scraped 1sBFdf44aRKBUczFzG1mTH9wKREpLJSvG
Succesfully scraped 14koyL9vuAMb1PyRPJscurCrsRXswKwQtC
Succesfully scraped 1PJCSh3qYBYitWhne9dSxDTZuoC1mzLhyD
Succesfully scraped 1PM4jtnodppMhNsXjCYCVU6AZeXSoikBMX
Succesfully scraped 16G4jWNuCJ2uHMxDiMWFwv8K9RRHo9c4NJ
Succesfully scraped 17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD
Succesfully scraped 1Mu3JcqzczUinAFsUhcbvUTmQM7tcTLosB
Succesfully scraped 1DKPZvSF3zGqGA9Qu51gM2vLbwbX4WfQvJ
Succesfully scraped 1GmY73q24B61YfMfmqssEWj8oSe2zRPsK8
Succesfully scraped 3LuVbzNdhSZaaoJiq3Yu3LtcYWcsPjqF9t
Succesfully scraped 1B9VCEjtJWRkBJxY3JSDwwqQwj3hppm419
Succesfully scraped 1MohUS3AHUjpf7YBrHAGWYNPj7en6s4ayr
Succesfully scraped 1HRaXE6BLTbTck3QJHwNSDRXLpcMNoRhzB
Succesfully scraped 1HExKDjstytfmZzLjqd2ivkArBuJYQxPjQ
Succesfully

## Detecting potential change address

In [214]:
potential_change_addresses = []  # This list contains all addresses in transactions that satisfy condition 1

In [215]:
transactions = edges_2021

for index, row in transactions.iterrows():  # Loop over all transactions to check if this is target's first appearance
    
    output_address = row['target']
    
    try:
    
        output_address_transactions = output_dict[output_address]['txs']

        # Get first transaction of output address (last transaction of txs list)
        first_transaction_output_address = output_address_transactions[-1]

        # Check if first transaction of output address is the present transaction
        date_first_transaction = first_transaction_output_address['time']
        date_current_transaction = row['timestamp']

        if date_first_transaction == date_current_transaction:
            if output_address not in potential_change_addresses:
                potential_change_addresses.append(output_address)
                print("Change address at index: {}".format(index))
        else:
            print("TS time {}".format(datetime.fromtimestamp(date_first_transaction)))
            #print("O time {}".format(row['datetime']))
    except:
        pass

TS time 2021-04-28 12:03:10
Change address at index: 1
TS time 2021-02-04 11:55:05
TS time 2021-04-28 12:03:10
TS time 2021-01-24 16:19:15
TS time 2021-04-28 12:03:10
TS time 2021-01-24 13:00:05
TS time 2021-01-24 10:53:54
TS time 2021-01-24 10:32:42
TS time 2021-04-28 12:03:10
TS time 2021-01-22 13:35:36
TS time 2021-04-28 12:03:10
TS time 2021-01-21 21:06:17
TS time 2021-01-21 15:17:12
TS time 2021-04-28 12:03:10
Change address at index: 25
TS time 2021-04-28 12:03:10
TS time 2021-04-28 12:03:10
Change address at index: 28
TS time 2021-04-28 12:03:10
TS time 2021-01-18 18:57:24
Change address at index: 35
TS time 2021-04-28 12:03:10
TS time 2021-01-17 18:13:33
TS time 2021-01-17 18:13:33
Change address at index: 40
TS time 2021-01-17 18:13:33
Change address at index: 42
TS time 2020-07-28 22:07:10
TS time 2020-08-29 12:38:12
TS time 2021-01-17 18:13:33
TS time 2021-01-17 15:17:10
TS time 2021-01-17 14:57:04
TS time 2021-01-17 15:17:10
Change address at index: 51
TS time 2021-01-17 11

### 2.3 No address that is both input and output in same transaction

In [216]:
transactions = transactions.drop_duplicates()

In [217]:
for name, group in grouped_transactions:
    
    sources = group['source']
    targets = group['target']
    
    for source in sources:
        if source in group['target']:
            potential_change_addresses = list(set(potential_change_addresses) - set(targets))

### 2.4 All other output addresses in same transaction are not first appearing

In [218]:
grouped_transactions = transactions.groupby(['amount', 'timestamp'])

In [219]:
for name, group in grouped_transactions:
    output_addresses = group['target']
    
    counter = 0
    for output in output_addresses:
        if output in potential_change_addresses:
            counter +=1
    if counter >= 2:
        potential_change_addresses = list(set(potential_change_addresses) - set(output_addresses))
            

In [220]:
def change_address_to_id_from_input_heuristic(potential_changed_address, raw_transactions):
    id_found = raw_transactions[[potential_changed_address in nested for nested in raw_transactions['to']]]['from_id'].tolist()
    return(id_found)

In [152]:
# for address in potential_change_addresses:
#     print(change_address_to_id_from_input_heuristic(address, raw_transactions)[0][0])

In [221]:
for address in potential_change_addresses:
    id_match = change_address_to_id_from_input_heuristic(address, raw_transactions)[0][0]
    identity.at[identity.index[identity.id == id_match].tolist()[0], 'from'] += [address]


In [222]:
raw_transactions['from_id'] = raw_transactions['from'].apply(mapping_from, identity = identity).apply(set).apply(list)
raw_transactions['to_id'] = raw_transactions['to'].apply(mapping, identity = identity)
edges = edge_creator(raw_transactions)

In [262]:
nodes = list(set(edges['sender'].tolist() + edges['receiver'].tolist()))

In [349]:
print('The total number of accounts after applying the heuristics is {}'.format(len(nodes)))

The total number of accounts after applying the heuristics is 477


## Create ego network

#### Get transactions of the accounts that interacted with the ego

In [248]:
raw_transactions_all = [output_dict[address]['txs'] for address in output_dict]

#### Convert the transaction dataframe into an edge table

In [275]:
raw_transactions_edges = pd.DataFrame(columns=['from','to','amount','timestamp'])
# Get table of transactions in raw form
for address in raw_transactions_all:
    try:
        raw_transactions_edges = raw_transactions_edges.append(get_edge_list(address))
    except:
        pass
    
raw_transactions_edges.reset_index(inplace=True)

#### Select only transactions happened in 2021

In [276]:
raw_transactions_edges['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in raw_transactions_edges['timestamp']]
raw_transactions_edges = raw_transactions_edges.loc[raw_transactions_edges.Date>parse('2021-01-01'), :]
raw_transactions_edges.drop(columns=['Date'], inplace = True)

#### Map with unique identifiers

In [284]:
raw_transactions_edges['id'] = range(len(raw_transactions_edges))
raw_transactions_edges['from_id'] = raw_transactions_edges['from'].apply(mapping_from, identity = identity).apply(set).apply(list)
raw_transactions_edges['to_id'] = raw_transactions_edges['to'].apply(mapping, identity = identity)

#### Select only edges that include accounts that appear in the edge list of the ego

In [285]:
def is_in_list2(list_in, list_preset):
    return(any([element in list_preset for element in list_in]))

ego_network_transactions = raw_transactions_edges[raw_transactions_edges.from_id.apply(is_in_list2, list_preset = nodes) & raw_transactions_edges.to_id.apply(is_in_list2, list_preset = nodes)]

In [286]:
edges_ego_network = edge_creator(ego_network_transactions)

In [290]:
edges_ego_network = edges_ego_network.drop_duplicates()

In [314]:
edges_ego_network.columns = ['source', 'target', 'amount', 'timestamp']
edges.columns = ['source', 'target', 'amount', 'timestamp']

#### Merge edges of others with edges of the ego

In [317]:
edges_ego_network = pd.concat([edges_ego_network, edges])

In [318]:
edges_ego_network.to_csv('/home/massi/my_project_dir/blockChain/data/ego_network_data.csv', index=False)

In [302]:
len(set(edges_ego_network['source'].tolist() + edges_ego_network['target'].tolist()))

3198

#### Get ego network of degree 1

In [308]:
edges_network_only_nodes = edges_ego_network[edges_ego_network.source.isin(nodes) & edges_ego_network.target.isin(nodes)]

In [319]:
edges_network_only_nodes = pd.concat([edges_network_only_nodes, edges])

In [320]:
edges_network_only_nodes.to_csv('/home/massi/my_project_dir/blockChain/data/ego_network_data_only_nodes.csv', index=False)