In [1]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
from dateutil.parser import parse
import copy

In [2]:
def intersect(s1, s2):
    return(len(s1.intersection(s2))>0)

def map_addresses_with_id(ego_edge_list, identity):
    
    from_list = [nested for nested in get_id_from_address(ego_edge_list['from'], identity)]
    to_list = [nested for nested in get_id_from_address(ego_edge_list['to'], identity)]
    
    ego_edge_list['from_id'] = from_list
    ego_edge_list['to_id'] = to_list
    
    return(ego_edge_list)

def merger(identity):
    
    copy_identity = copy.deepcopy(identity)    
    end = len(identity['from'])
    idx = 0
    
    while idx < len(copy_identity['from']):

        from1 = copy_identity.iloc[idx,0]
        from1_idx = copy_identity.iloc[idx,:].index
        idxes_mtch = copy_identity.index[copy_identity['from'].apply(set).apply(intersect, s2=set(from1))].tolist()

        if len(idxes_mtch)>1:
            for to_remove in idxes_mtch[1:]:
                #copy_identity.loc[idxes_mtch[0], 'from'].extend(copy_identity.loc[to_remove, 'from'])
                copy_identity.at[idxes_mtch[0], 'from'] = copy_identity.at[idxes_mtch[0], 'from'] + copy_identity.at[to_remove, 'from']
            copy_identity.drop(idxes_mtch[1:], inplace = True)
            
            end = len(copy_identity['from'])
        idx += 1
        
    return(copy_identity)

In [3]:
def get_info_from_address(address):
    r =requests.get('https://blockchain.info/rawaddr/' + address).json()
    r = r['txs']
    return(r)

def get_transactions_from_addresses(address_list):
    
    raw_transactions = pd.DataFrame(columns=['from','to','amount','timestamp'])
    # Get table of transactions in raw form
    for address in address_list:
        try:
            request_info = get_info_from_address(address)
            raw_transactions = raw_transactions.append(get_edge_list(request_info))
            time.sleep(20)
            print('Completed address {}'.format(address))
        except:
            print(' Address {} raised exception'.format(address))
    
    raw_transactions.reset_index(inplace=True)
    
    return(raw_transactions)


In [4]:
def get_edge_list(info_from_address):
    
    senders = create_sender_list(info_from_address)
    receivers = create_receiver_list(info_from_address)
    
    edges_ego = pd.DataFrame({'from':senders, 'to':receivers, 'amount':amount_transferred_per_transaction(info_from_address), 'timestamp':create_timestamp_list_transaction(info_from_address)})
    
    return(edges_ego)

def create_receiver_list(transaction_ego):
    receiver_addresses = list()
    issues_receiver = list()
    amount = list()
    for j in range(len(transaction_ego)):
        receiver_addresses.append([])
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                receiver_addresses[-1].append(transaction_ego[j]['out'][i]['addr'])
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                issues.append([i,j])
    
    return(receiver_addresses)


def amount_transferred_per_transaction(transaction_ego):
    
    amount = list()
    for j in range(len(transaction_ego)):
        amount.append([])
        for i in range(len(transaction_ego[j]['out'])):
            try:
                amount[-1].append(transaction_ego[j]['out'][i]['value'])
            except:
                 pass
    
    return(amount)


def create_sender_list(transaction_ego):
    
    sender_addresses = list()
    issues_sender = list()
    for j in range(len(transaction_ego)):
        sender_addresses.append([])
        for i in range(len(transaction_ego[j]['inputs'])):
            try:
                sender_addresses[-1].append(transaction_ego[j]['inputs'][i]['prev_out']['addr'])
            except:
                issues.append([i,j])
    return(sender_addresses)

def create_timestamp_list_transaction(transaction_ego):
    return([transaction_ego[i]['time'] for i in range(len(transaction_ego))])



In [5]:
# Mapping functions

# def get_id_from_address(address, identity_table):
#     return(identity_table[[address in nested for nested in identity_table['from']]]['id'].tolist()[0])
def mapping(address_list, identity):
    return([get_id_from_address(address, identity) for address in address_list])

def mapping_from(address_list, identity):
    return([get_id_from_address(address_list[0], identity)])

def get_id_from_address(address, identity_table):
    id_found = identity_table[identity_table['from'].apply(is_in_list, address = address)]['id'].tolist()
    
    if len(id_found)==1:
        return(id_found[0])
    
    elif len(id_found)>1:
        print(id_found)
    else:
        return(address)

def is_in_list(address_list, address):
    return(address in address_list)

In [6]:
def edge_creator(raw_transactions):
    
    edges = pd.DataFrame(columns = ['sender', 'receiver', 'amount', 'timestamp'])

    for index_raw in range(len(raw_transactions)):
        for index_receiver in range(len(raw_transactions.iloc[index_raw, -1])):
            sender = raw_transactions.iloc[index_raw, -2][0]
            receiver = raw_transactions.iloc[index_raw, -1][index_receiver]
            amount = raw_transactions.iloc[index_raw, -5][index_receiver]
            timestamp = raw_transactions.iloc[index_raw, -4]
            edges = edges.append({'sender':sender, 'receiver': receiver, 'amount': amount, 'timestamp': timestamp}, ignore_index=True)
    
    return(edges)

In [7]:
def flatten_nested_list(nested_list):
    return([item for nested in  nested_list for item in nested])

# Analysis of Terrorism Fund Raising

In [8]:
# address_list = ['17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', 
#                 '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp', 
#                 '186YZVryvtxuXESLo1jzYU1xoRgyd5WARN', 
#                 '1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s', 
#                 '32cNfustcJXjz7afCGPuCuRpgZjcYJQsLE', 
#                 '3CUXTV35SteDufJyuSTSSWhHdrbnpbTXz9', 
#                 '3Fqne1QeMEGHVJwoz3m95fnDZvqfXL2Z4u', 
#                 '34xp4vRoCGJym3xR7yCVPFHoCNxv4Twseo', # receiver of 1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s (it made a lot of transactions and a lot of money traffic)
#                 '15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf', 
#                 '34GzR7ytFGSviY6CttWH3uDV6QEy2n1JcD', 
#                 '3434fpnej1Y9cKb5pYwGXoeTnptmW3bXdq', 
#                 '123kGrVjKmvQAnjuNDuxMCnscFjNxoxnym', 
#                 '12Cojd2nCLaqtmvKXaAC5FFZ8HpqUa5i5C', 
#                 '12DWpXck5B3oQsp18NG22v57eBdMcHz326',
#                 '14btpv9LnvJBRPwLqakvdHLUjfLsZrN7sK',
#                 '19P5khHYGdsyGPX4xH8RMQxNaWbRCiqzZ3', 
#                 '1s8nW9cXRJtdHd1hQ1ZFFvgoUNkjKJcf8', 
#                 '197BDiQuWki4egPACYj1f2UtdZ2d4QiFP5', 
#                 '1DZ9iLykDrZgjMURNkbmokAwv17VsW998u', 
#                 '16YRiGSGLxKdLQPxiWeTgt8jrpZwRcxjQ3', 
#                 '1LDQhgUTEQJqzGTRPUY5EAthJvktixMnLE',
#                 '1LdnTDv4Zs524aU2fifLi3vVHWhPL6Cgp1',
#                 '1PLQFgpdEJQGQ9rMjEWP3gdKv1bJGu9fse',
#                 '13kcud8fKBfeoRjym9wC7tfLu4skL5s27t',
#                 '1F3twjJjFh6DeQWFNUxVCdLvZk1xNsee7T',
#                 '1L6BbVSFpHCTibmHAGmtx4qVntRx2n58NX',
#                 '1PVQiWq9ds3CsHF2h2wF6qoccxVenprSoQ',
#                 '1AppUXgJnhGrBpbV1vG4XwnXUvenTYh3M4', 
                
#                 'bc1qwfgdjyy95aay2686fn74h6a4nu9eev6np7q4fn204dkj3274frlqrskvx0',
#                '1AupiZBi1bMVfcMgfFx4qcPtVWVgdvE29c',  # receiver of 1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s
#                  '37tRFZw7n94Jddq6TfVs3MbCXmDX6eMfeY' 
#                 '13s7sRxYi9iiCqgbefhk1MQiGzj9LNWUsy',
#                 '14xQDjyBVXHFRpi9q2HDt1wTCigkDjRt8j', 
#                 '1CT58z7sbUyAEZPVkeqnMpnSBBCEEoSbfQ',
#                  '1GX84MiXv4SjpPgTYjenmDxn3HL3nLJkot', 
#                 '1BixJDFzEBmh7uSzbebmeugro4i6G54ga3',
#                 '1P8AYmUjH3kqoaW1qNXYBYKUA5RQ97ryEd',
#                 '16rhvDAd64fMhpFcH5N7sh7QdyLLGy9M44', 
#                 '12jwNYodFi926PeMGpaqhdq7ZFLn2VAeon',
#                 '16svEnyvUqFdXphdPC7ydPcAJAqYHXAhaa',
#                  '1FYk7mGjbfW1jVz2dWNUYReENEJ6wqta9a', 
#                 '1A3VDjFVEToupAAQmyE2wSq2rNz3iuEqTX',
#                 '13LNRzkFC8FMyZsosWtLcdcTiMLoVvLufk',
#                 '13QWZ1sv5wwPfZxAa8a6MPeEPuVA7MK8rF',
#                 '1AWGdz1PguoHPJ2zKHMnRthC7LoCbVf3PB', 
#                 '191BDVckuiY6Sf8QBWynCB81ib5JVwKhDx',
#                 '1HUEp4RGdVYYuJWB4jkJ9B1UTLW7wqNC6X'
#                ]

In [9]:
address_list = ['1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp']

In [10]:
raw_transactions = get_transactions_from_addresses(address_list)

Completed address 1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp


In [11]:
# Covert timestamps into date objects
raw_transactions['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in raw_transactions['timestamp']]

#### Number of different accounts before heuristics:

In [12]:
senders_2021 = len(set(flatten_nested_list(raw_transactions.loc[raw_transactions.Date>parse('2021-01-01'), 'from'])))
print('The total number of addresses only in 2021 to be clustered with first heuristic: {}'.format(senders_2021))

The total number of addresses only in 2021 to be clustered with first heuristic: 2354


In [13]:
number_of_accounts = len(set(flatten_nested_list(raw_transactions.loc[raw_transactions.Date>parse('2021-01-01'), 'from']) + flatten_nested_list(raw_transactions.loc[raw_transactions.Date>parse('2021-01-01'), 'to'])))
print('The total number of different accounts before heuristics is {}'.format(number_of_accounts))

The total number of different accounts before heuristics is 2500


In [14]:
# Remove Date column as not necessary
raw_transactions.drop(columns=['Date'], inplace = True)

### Apply Input Heuristic

In [15]:
raw_transactions['id'] = range(len(raw_transactions))
identity = copy.deepcopy(raw_transactions.loc[:, ['from','id']])
identity = merger(identity)

#### Map accounts' addresses with unique identifier

In [16]:
raw_transactions['from_id'] = raw_transactions['from'].apply(mapping_from, identity = identity).apply(set).apply(list)
raw_transactions['to_id'] = raw_transactions['to'].apply(mapping, identity = identity)
edges = edge_creator(raw_transactions)

In [17]:
#edges.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/edges.csv', index=False)

#### Take only transactions happened in 2021

In [18]:
edges['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges['timestamp']]

edges_2021 = edges[edges.Date>parse('2021-01-01')]
edges_2021.columns = ['source', 'target', 'amount', 'timestamp', 'Date']
edges_2021.drop(columns=['Date'], inplace = True)
#edges_2021.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/edges_2021.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [19]:
number_people_2021 = len({el for el in set(pd.concat([edges_2021['source'], edges_2021['target']])) if type(el)==int})
print('{} raw addresses where linked to {} different persons.'.format(senders_2021, number_people_2021))

2354 raw addresses where linked to 25 different persons.


In [20]:
nodes = len(set(pd.concat([edges_2021['source'], edges_2021['target']])))
nodes

170

#### Store accounts in the output that are not mapped yet

In [21]:
tb_scraped = list(set(edges_2021.loc[[type(value) != int for value in edges_2021['target']], 'target']))

In [22]:
#identity.to_csv('/home/massi/my_project_dir/blockChain/data/edges_final/identity.csv', index=False)

# Second Heuristic

In [23]:
import json

In [24]:
#output_dict = dict()

In [25]:
# for address in tb_scraped:
    
#     time.sleep(20)

#     try:    
#         output_address_data = requests.get('https://blockchain.info/rawaddr/{}'.format(address)).json()
#         output_dict[address] = output_address_data

#         with open('1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp_connections.json', 'w') as fp:
#             json.dump(output_dict, fp)

#         print("Succesfully scraped {}".format(address))
#     except:
#         print("Error at address {}".format(address))

In [26]:
# for nested in identity['from']:
    
#     address = nested[0]
    
#     time.sleep(20)

#     try:    
#         output_address_data = requests.get('https://blockchain.info/rawaddr/{}'.format(address)).json()
#         output_dict[address] = output_address_data

#         with open('1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp_connections.json', 'w') as fp:
#             json.dump(output_dict, fp)

#         print("Succesfully scraped {}".format(address))
#     except:
#         print("Error at address {}".format(address))

In [27]:
with open('1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp_connections.json') as output_test:
    output_dict = json.load(output_test)

## Detecting potential change address

In [28]:
potential_change_addresses = []  # This list contains all addresses in transactions that satisfy condition 1

In [29]:
transactions = edges_2021

for index, row in transactions.iterrows():  # Loop over all transactions to check if this is target's first appearance
    
    output_address = row['target']
    
    try:
    
        output_address_transactions = output_dict[output_address]['txs']

        # Get first transaction of output address (last transaction of txs list)
        first_transaction_output_address = output_address_transactions[-1]

        # Check if first transaction of output address is the present transaction
        date_first_transaction = first_transaction_output_address['time']
        date_current_transaction = row['timestamp']

        if date_first_transaction == date_current_transaction:
            if output_address not in potential_change_addresses:
                potential_change_addresses.append(output_address)
                print("Change address at index: {}".format(index))
        else:
            print("TS time {}".format(datetime.fromtimestamp(date_first_transaction)))
            #print("O time {}".format(row['datetime']))
    except:
        pass

TS time 2021-04-28 12:03:10
Change address at index: 1
Change address at index: 2
TS time 2021-04-28 12:03:10
TS time 2021-01-24 16:19:15
TS time 2021-04-28 12:03:10
TS time 2021-01-24 13:00:05
TS time 2021-01-24 10:53:54
TS time 2021-01-24 10:32:42
TS time 2021-04-28 12:03:10
TS time 2021-01-22 13:35:36
TS time 2021-04-28 12:03:10
TS time 2021-01-21 21:06:17
Change address at index: 20
TS time 2021-04-28 12:03:10
TS time 2021-01-19 20:56:58
TS time 2021-04-28 12:03:10
TS time 2021-04-28 12:03:10
TS time 2021-01-19 18:57:42
TS time 2021-04-28 12:03:10
Change address at index: 33
TS time 2021-01-18 02:57:15
TS time 2021-04-28 12:03:10
Change address at index: 38
Change address at index: 39
TS time 2021-01-17 18:13:24
Change address at index: 41
TS time 2021-01-17 18:13:24
TS time 2020-07-28 22:07:10
TS time 2020-08-29 12:38:12
Change address at index: 45
Change address at index: 48
TS time 2021-01-17 14:57:04
Change address at index: 50
TS time 2021-01-17 15:17:01
TS time 2021-01-17 11:

### 2.3 No address that is both input and output in same transaction

In [30]:
grouped_transactions = transactions.groupby(['amount', 'timestamp'])

In [31]:
transactions = transactions.drop_duplicates()

In [32]:
for name, group in grouped_transactions:
    
    sources = group['source']
    targets = group['target']
    
    for source in sources:
        if source in group['target']:
            potential_change_addresses = list(set(potential_change_addresses) - set(targets))

### 2.4 All other output addresses in same transaction are not first appearing

In [33]:
for name, group in grouped_transactions:
    output_addresses = group['target']
    
    counter = 0
    for output in output_addresses:
        if output in potential_change_addresses:
            counter +=1
    if counter >= 2:
        potential_change_addresses = list(set(potential_change_addresses) - set(output_addresses))
            

In [34]:
def change_address_to_id_from_input_heuristic(potential_changed_address, raw_transactions):
    id_found = raw_transactions[[potential_changed_address in nested for nested in raw_transactions['to']]]['from_id'].tolist()
    return(id_found)

In [35]:
# for address in potential_change_addresses:
#     print(change_address_to_id_from_input_heuristic(address, raw_transactions)[0][0])

In [36]:
print('Number of addresses linked to the same persons with second heuristic: {}'.format(len(potential_change_addresses)))

Number of addresses linked to the same persons with second heuristic: 45


In [37]:
unique_id_owner_change_address = len(set([change_address_to_id_from_input_heuristic(address, raw_transactions)[0][0] for address in potential_change_addresses]))
print('{} addresses were linked to {} different persons'.format(len(potential_change_addresses), unique_id_owner_change_address))

45 addresses were linked to 8 different persons


In [38]:
for address in potential_change_addresses:
    id_match = change_address_to_id_from_input_heuristic(address, raw_transactions)[0][0]
    identity.at[identity.index[identity.id == id_match].tolist()[0], 'from'] += [address]

#### Map with new id the transaction history of ego

In [39]:
raw_transactions['from_id'] = raw_transactions['from'].apply(mapping_from, identity = identity).apply(set).apply(list)
raw_transactions['to_id'] = raw_transactions['to'].apply(mapping, identity = identity)
edges = edge_creator(raw_transactions)
edges['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges['timestamp']]
edges_2021 = edges[edges.Date>parse('2021-01-01')]
edges_2021.columns = ['source', 'target', 'amount', 'timestamp', 'Date']
edges_2021.drop(columns=['Date'], inplace = True)

In [40]:
nodes = list(set(edges_2021['source'].tolist() + edges_2021['target'].tolist()))

In [41]:
print('The total number of accounts after applying the heuristics is {}'.format(len(nodes)))

The total number of accounts after applying the heuristics is 125


## Create ego network

#### Get transactions of the accounts that interacted with the ego

In [42]:
raw_transactions_all = [output_dict[address]['txs'] for address in output_dict]

#### Convert the transaction dataframe into an edge table

In [43]:
raw_transactions_edges = pd.DataFrame(columns=['from','to','amount','timestamp'])
# Get table of transactions in raw form
for address in raw_transactions_all:
    try:
        raw_transactions_edges = raw_transactions_edges.append(get_edge_list(address))
    except:
        pass
    
raw_transactions_edges.reset_index(inplace=True)

#### Select only transactions happened in 2021

In [44]:
raw_transactions_edges['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in raw_transactions_edges['timestamp']]
raw_transactions_edges = raw_transactions_edges.loc[raw_transactions_edges.Date>parse('2021-01-01'), :]
raw_transactions_edges.drop(columns=['Date'], inplace = True)

#### Map with unique identifiers

In [45]:
raw_transactions_edges['id'] = range(len(raw_transactions_edges))
raw_transactions_edges['from_id'] = raw_transactions_edges['from'].apply(mapping_from, identity = identity).apply(set).apply(list)
raw_transactions_edges['to_id'] = raw_transactions_edges['to'].apply(mapping, identity = identity)

#### Select only edges that include accounts that appear in the edge list of the ego

In [46]:
def is_in_list2(list_in, list_preset):
    return(any([element in list_preset for element in list_in]))

ego_network_transactions = raw_transactions_edges[raw_transactions_edges.from_id.apply(is_in_list2, list_preset = nodes) & raw_transactions_edges.to_id.apply(is_in_list2, list_preset = nodes)]

In [47]:
edges_ego_network = edge_creator(ego_network_transactions)

In [48]:
edges_ego_network = edges_ego_network.drop_duplicates()

In [49]:
edges.columns = ['source', 'target', 'amount', 'timestamp', 'Date']
edges_ego_network.columns = ['source', 'target', 'amount', 'timestamp']
edges_2021.columns = ['source', 'target', 'amount', 'timestamp']

#### Merge edges of others with edges of the ego

In [50]:
edges_ego_network = pd.concat([edges_ego_network, edges_2021])

#### Remove sefl-loop

In [51]:
edges_ego_network = edges_ego_network[edges_ego_network.source!=edges_ego_network.target]

#### Store as csv

In [152]:
edges_ego_network.to_csv('/home/massi/my_project_dir/blockChain/data/ego_network_data_2021.csv', index=False)

In [52]:
len(set(edges_ego_network['source'].tolist() + edges_ego_network['target'].tolist()))

2620

#### Get ego network of degree 1

In [53]:
edges_network_only_nodes = edges_ego_network[edges_ego_network.source.isin(nodes) & edges_ego_network.target.isin(nodes)]

In [54]:
edges_network_only_nodes= edges_network_only_nodes.drop_duplicates()

In [53]:
edges_network_only_nodes.to_csv('/home/massi/my_project_dir/blockChain/data/ego_network_data_only_nodes.csv', index=False)

In [319]:
#edges_network_only_nodes = pd.concat([edges_network_only_nodes, edges])

In [55]:
edges_network_only_nodes['Date'] = [datetime.fromtimestamp(timestamp) for timestamp in edges_network_only_nodes['timestamp']]

In [56]:
edges_network_only_nodes.head()

Unnamed: 0,source,target,amount,timestamp,Date
0,40,1NmZ7m1QcCamK7iSdZH1zgCDVyjnx8CXRj,202070,1609683090,2021-01-03 15:11:30
1,40,0,3613740,1609683090,2021-01-03 15:11:30
2,21,0,1338987,1610903604,2021-01-17 18:13:24
5,21,3NxYb9saP2FdiKASkNPZtMGcTS8dZo4gb3,1911984,1610903604,2021-01-17 18:13:24
7,21,3D6GyDZ6dhZNwSi7VJbZJCRvs5Qh2oyLEr,6911959,1610903604,2021-01-17 18:13:24


In [57]:
def get_temporal_motif1(edges_network_only_nodes):
    
    triads = list()
    for index in range(len(edges_network_only_nodes)):
        a = edges_network_only_nodes.iloc[index, 0]
        b = edges_network_only_nodes.iloc[index, 1]
        timestamp_ab = edges_network_only_nodes.iloc[index, -1]
        # rows where the sender is either the sender or the receiver of the current row.
        outgoing_b_clos_in_time = edges_network_only_nodes[(edges_network_only_nodes.source == b) & (edges_network_only_nodes.Date >= timestamp_ab) & (edges_network_only_nodes.Date <= timestamp_ab + timedelta(minutes=30))]
        if len(outgoing_b_clos_in_time)>0:
            triads.append(outgoing_b_clos_in_time.append(edges_network_only_nodes.iloc[index]).sort_values(by='Date'))
    return(triads)

In [58]:
def get_temporal_motif2(edges_network_only_nodes):
    
    triads = list()
    for index in range(len(edges_network_only_nodes)):
        edges_network_only_nodes_copy = copy.deepcopy(edges_network_only_nodes.iloc[index:])
        a = edges_network_only_nodes.iloc[index, 0]
        b = edges_network_only_nodes.iloc[index, 1]
        # rows where the sender is either the sender or the receiver of the current row.
        outgoing_a_b = edges_network_only_nodes[(edges_network_only_nodes.source == b) | (edges_network_only_nodes.source == a)]
        receivers_a = edges_network_only_nodes.loc[edges_network_only_nodes.source == a, 'target']
        # Store edges where the sender is b and the receivers are receivers of a
        edges_b_to_receivers_a = edges_network_only_nodes[(edges_network_only_nodes.source == b) & (edges_network_only_nodes.target.isin(receivers_a))]
        if len(edges_b_to_receivers_a)>0:
            for receivers_a_and_b in list(set(edges_b_to_receivers_a.target)):
                #next_edges_b = edges_network_only_nodes[([b == element for element in edges_network_only_nodes.source]) & (edges_network_only_nodes.target.isin( list(set(receivers_a) - {b})))]
                next_edges_a = edges_network_only_nodes[(edges_network_only_nodes.source == a) & (edges_network_only_nodes.target == receivers_a_and_b)] #(edges_network_only_nodes.target.isin(next_edges_b.target))]
                #next_edges_b = next_edges_b.iloc[0, :]
                next_edges_b = edges_b_to_receivers_a[edges_b_to_receivers_a.target == receivers_a_and_b]
                next_edges = pd.concat([next_edges_a, next_edges_b])
                #next_edges = next_edges_a.append(next_edges_b)
                triad = next_edges.append(edges_network_only_nodes.iloc[index, :]).sort_values(by='Date')
                triad = triad[triad.Date.isin([triad.loc[(triad['source'] == source) & (triad['target'] == target), 'Date'].min() for source,target in zip(triad['source'], triad['target'])])]
                if triad.iloc[0, 0] == triad.iloc[1, 0]:
            #triad = next_edges.append(edges_network_only_nodes.iloc[index,:]).sort_values(by='Date')
                    triads.append(triad)
    
    return(triads)

In [59]:
def remove_self_loop(edges):
    return(edges[edges.source!=edges.target])

In [60]:
forw_paths1 = get_temporal_motif1(edges_network_only_nodes)

In [61]:
forw_paths2 = get_temporal_motif2(edges_network_only_nodes)

In [62]:
len(forw_paths1), len(forw_paths2)

(7, 3)

In [63]:
forw_paths2

[     source target    amount   timestamp                Date
 187      41      0  12468200  1609677923 2021-01-03 13:45:23
 7861     41     12   5941200  1611127498 2021-01-20 08:24:58
 7860     12      0   5927448  1611127637 2021-01-20 08:27:17,
      source target    amount   timestamp                Date
 187      41      0  12468200  1609677923 2021-01-03 13:45:23
 7865     41     16   4836000  1611064651 2021-01-19 14:57:31
 7864     16      0   4819765  1611065220 2021-01-19 15:07:00,
      source target    amount   timestamp                Date
 187      41      0  12468200  1609677923 2021-01-03 13:45:23
 7869     41     19   5553000  1610977934 2021-01-18 14:52:14
 7868     19      0   5547079  1610978026 2021-01-18 14:53:46]

In [64]:
for path in forw_paths1:
    display(path[:2])

Unnamed: 0,source,target,amount,timestamp,Date
7852,41,5,7148600,1611481772,2021-01-24 10:49:32
10,5,17MK6zezZyxoEjwamGKqtbpoZau8K8dprH,2543626,1611481841,2021-01-24 10:50:41


Unnamed: 0,source,target,amount,timestamp,Date
7860,12,0,5927448,1611127637,2021-01-20 08:27:17
89,0,1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s,1897619061,1611129418,2021-01-20 08:56:58


Unnamed: 0,source,target,amount,timestamp,Date
7861,41,12,5941200,1611127498,2021-01-20 08:24:58
7860,12,0,5927448,1611127637,2021-01-20 08:27:17


Unnamed: 0,source,target,amount,timestamp,Date
7865,41,16,4836000,1611064651,2021-01-19 14:57:31
7864,16,0,4819765,1611065220,2021-01-19 15:07:00


Unnamed: 0,source,target,amount,timestamp,Date
7869,41,19,5553000,1610977934,2021-01-18 14:52:14
7868,19,0,5547079,1610978026,2021-01-18 14:53:46


Unnamed: 0,source,target,amount,timestamp,Date
8095,1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s,31,1321800,1610218952,2021-01-09 20:02:32
8090,31,0,1312760,1610220515,2021-01-09 20:28:35


Unnamed: 0,source,target,amount,timestamp,Date
24,12,0,5927448,1611127649,2021-01-20 08:27:29
89,0,1NDyJtNTjmwk5xPNhjgAMu4HDHigtobu1s,1897619061,1611129418,2021-01-20 08:56:58


In [65]:
for path in forw_paths2:
    display(path)

Unnamed: 0,source,target,amount,timestamp,Date
187,41,0,12468200,1609677923,2021-01-03 13:45:23
7861,41,12,5941200,1611127498,2021-01-20 08:24:58
7860,12,0,5927448,1611127637,2021-01-20 08:27:17


Unnamed: 0,source,target,amount,timestamp,Date
187,41,0,12468200,1609677923,2021-01-03 13:45:23
7865,41,16,4836000,1611064651,2021-01-19 14:57:31
7864,16,0,4819765,1611065220,2021-01-19 15:07:00


Unnamed: 0,source,target,amount,timestamp,Date
187,41,0,12468200,1609677923,2021-01-03 13:45:23
7869,41,19,5553000,1610977934,2021-01-18 14:52:14
7868,19,0,5547079,1610978026,2021-01-18 14:53:46
