# 7. Customers Graph

In [1]:
import pandas as pd
import numpy as np
import torch as th
import dgl

In [2]:
#!pip install dgl

# Import Data

In [3]:
# import new data after feature engineering
wires = pd.read_csv('wires.csv').iloc[:, 1:]
emts = pd.read_csv('emts.csv').iloc[:, 1:]
cash = pd.read_csv('cash.csv').iloc[:, 1:]
cust_info = pd.read_csv('cust_info.csv').iloc[:, 1:]
detail_cust = pd.read_csv('detailed_cust_info.csv').iloc[:, 1:]
ext_info = pd.read_csv('external_info.csv').iloc[:, 1:]

For the heterogeneous graph:

**Nodes**
- `customers` --> customers that are recorded in the kyc dataframe
- `external users` --> customers that are not recorded in the kyc dataframe
    
**Edges**
- `wire` transactions --> between customers and external customers
- `emt` transactions --> between customers and external customers
- `cash1` transactions --> customer self-loop
    
**Node Features**
- customers: detail_cust dataframe
- external customers: ext_info dataframe

**Edge Features**
- corresponding transaction data

#### Canonical Edge Types (COO form)

Note the edges are abbreviated as: **Source-transaction_type-Destination**
- e.g. ('customer', 'cwc', 'customer') --> customer-wire_trxn-customer --> 'cwc'

The types of connections we will make include (COO format):
- ('customer', 'cwc', 'customer')
- ('customer', 'cwe', 'external')
- ('external', 'ewc', 'customer')
- ('customer', 'cec', 'customer')
- ('customer', 'cee', 'external')
- ('external', 'eec', 'customer')
- ('customer', 'cc_self', 'customer')
- ('external', 'ee_self', 'external')

# Get Node Features for Customers

In [4]:
detail_cust

Unnamed: 0,Global_id,Age,Tenure,label,Gender_female,Gender_male,Occupation_num,Num_wires_sent,Avg_wire_amt_sent,Num_wires_received,...,wse_part,wsc_part,ese_part,esc_part,esw_part,csc_part,csw_part,cse_part,Num_unique_communities_participated,Total_communities_participated
0,102123,45.0,13.0,0,1,0,8,2,1985.75,0,...,1.0,0.0,99.0,0.0,20.0,0.0,0.0,0.0,1.0,17.0
1,83128,52.0,8.0,0,0,1,146,0,0.00,0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,80244,43.0,11.0,0,1,0,115,2,14131.50,4,...,27.0,0.0,140.0,0.0,27.0,29.0,4.0,14.0,2.0,26.0
3,93124,39.0,21.0,1,0,1,195,2,5214.50,3,...,34.0,8.0,62.0,16.0,29.0,172.0,23.0,45.0,8.0,29.0
4,44548,26.0,8.0,0,1,0,212,0,0.00,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126178,80822,37.0,12.0,1,1,0,186,0,0.00,0,...,0.0,0.0,16.0,19.0,1.0,222.0,0.0,42.0,8.0,21.0
126179,18375,46.0,3.0,0,0,1,211,0,0.00,0,...,4.0,0.0,96.0,6.0,3.0,0.0,0.0,18.0,16.0,16.0
126180,10812,31.0,10.0,0,0,1,143,0,0.00,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
126181,23226,21.0,3.0,0,1,0,235,0,0.00,1,...,9.0,0.0,81.0,0.0,6.0,0.0,0.0,2.0,9.0,16.0


# Get Node Features for External Users

In [5]:
ext_info

Unnamed: 0,Global_id,Num_wires_sent,Avg_wire_amt_sent,Num_wires_received,Avg_wire_amt_received,Num_emts_sent,Avg_emt_amt_sent,Num_emts_received,Avg_emt_amt_received,Country_AU,...,wse_part,wsc_part,ese_part,esc_part,esw_part,csc_part,csw_part,cse_part,Num_unique_communities_participated,Total_communities_participated
0,126184,1,1546.0,0,0.0,0,0.000000,0,0.00,1,...,10.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
1,126199,1,1046.0,0,0.0,9,419.888889,10,797.35,0,...,8.0,0.0,164.0,4.0,12.0,0.0,0.0,2.0,18,20
2,126200,1,1444.0,0,0.0,0,0.000000,0,0.00,0,...,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
3,126201,1,3710.0,1,4011.0,8,1643.000000,5,840.40,0,...,16.0,1.0,76.0,3.0,12.0,0.0,0.0,18.0,3,15
4,126208,1,38450.0,0,0.0,0,0.000000,0,0.00,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69870,196051,0,0.0,0,0.0,1,403.000000,2,494.50,0,...,8.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,3,3
69871,196052,0,0.0,0,0.0,1,1128.000000,2,1065.25,0,...,0.0,0.0,17.0,8.0,3.0,0.0,0.0,14.0,3,3
69872,196054,0,0.0,0,0.0,2,377.500000,2,166.50,0,...,1.0,0.0,23.0,4.0,1.0,0.0,0.0,4.0,4,4
69873,196056,0,0.0,0,0.0,3,9.333333,0,0.00,0,...,0.0,0.0,10.0,7.0,3.0,0.0,0.0,0.0,3,3


# Get COO Tensors for Edges

#### Create maps to map the global ids to the corresponding index in the node features dataframe

In [6]:
# create a function to generate a map for a corresponding node features table
def gen_map(df, transactions):
    """
    """
    
    # If mapping transactions, get list of transaction ids
    if transactions:
        gids = list(df['trxn_id'])
    else:
        # get global_ids for indexes of customers
        gids = list(df['Global_id'])
    
    # create a list for the indexes to map to
    indexes = [ x for x in range(len(df)) ]

    # create a dictionary for the mapped indexes
    i_map = {gid:index for gid, index in zip(gids, indexes)}
    
    return i_map

In [7]:
# Create map for customers
cust_map = gen_map(cust_info, False)

In [8]:
# Create map for external users
ext_map = gen_map(ext_info, False)

#### Create functions to extract COO format

In [9]:
# create a function to get edges with the corresponding index values in the node features dataframe
def get_edges(df, ext_source, ext_dest):
    """
    """
    edges_df = pd.DataFrame()
    
    edges_df['trxn_id'] = df['trxn_id']
    
    # Map the global ids to the corresponding index in the customer node features dataframe
    edges_df['Source'] = df['sender_global_id'].map(cust_map)
    edges_df['Destination'] = df['rec_global_id'].map(cust_map)
    
    # If source/dest is external map to indexes in external users node features dataframe
    if ext_source:
        edges_df['Source'] = df['sender_global_id'].map(ext_map)
    elif ext_dest:
        edges_df['Destination'] = df['rec_global_id'].map(ext_map)
    
    return edges_df

In [10]:
# create a function to store the edges dataframe as a tuple with tensors
def get_tensors(edges, self_loop):
    """
    """
    
    if not self_loop:
        filtered = edges.drop(columns='trxn_id')
    else:
        filtered = edges
        
    # transpose the dataframe to get the format for the tensors
    values = filtered.values.transpose()
    
    # convert the values to tensors
    u = th.tensor(values[0].copy())
    v = th.tensor(values[1].copy())
    
    # return a tuple of node_tensors
    return (u, v)

## Get edges for wire transactions

In [11]:
# Get info for wire transactions
w_trxns = wires[['trxn_id', 'sender_global_id', 'rec_global_id', 'sender_ext', 'receiver_ext']]
w_trxns

Unnamed: 0,trxn_id,sender_global_id,rec_global_id,sender_ext,receiver_ext
0,LWCS42954834,46393,118403,0,0
1,NTTG55749308,101584,183844,0,1
2,IXVD84599097,184551,104548,1,0
3,SLBV29462341,71803,162608,0,1
4,ERLU26785367,117672,9197,0,0
...,...,...,...,...,...
48287,LRRP66624765,142262,107141,1,0
48288,KVQK50168638,112004,155051,0,1
48289,IUIP17370739,69903,167767,0,1
48290,ZHVK78574815,29953,96357,0,0


### Extract COO for ('customer', 'wire trxn', 'customer') 

In [12]:
# find transactions where both customers are not external users
cwc = w_trxns[(w_trxns['sender_ext'] == 0) & (w_trxns['receiver_ext'] == 0)][['trxn_id', 'sender_global_id', 'rec_global_id']]

# Extract the edges
cwc_edges = get_edges(cwc, False, False)
cwc_edges

Unnamed: 0,trxn_id,Source,Destination
0,LWCS42954834,107582,86145
4,ERLU26785367,72153,42578
19,QIFX52531812,24489,7441
21,FKHH38954970,110266,86810
22,GRDY40328822,39569,2878
...,...,...,...
48276,DQNW14946332,62723,55852
48279,JLBM22713710,68275,83821
48283,VJLA69865830,31693,84475
48286,QUSP53163099,110556,77679


In [13]:
# get the node-tensors for these edges
cwc_ten = get_tensors(cwc_edges, False)
cwc_ten

(tensor([107582,  72153,  24489,  ...,  31693, 110556,  14482]),
 tensor([86145, 42578,  7441,  ..., 84475, 77679, 40897]))

### Extract COO for ('customer', 'wire trxn', 'external') 

In [14]:
# find transactions where receiver is an external user
cwe = w_trxns[(w_trxns['sender_ext'] == 0) & (w_trxns['receiver_ext'] == 1)][['trxn_id', 'sender_global_id', 'rec_global_id']]

# Extract the edges
cwe_edges = get_edges(cwe, False, True)
cwe_edges

Unnamed: 0,trxn_id,Source,Destination
1,NTTG55749308,111902,60119
3,SLBV29462341,66766,7305
5,STZX92186222,63462,9719
12,NHIA18852481,81176,2171
13,MXLC90467212,106433,12237
...,...,...,...
48278,KQNN44244441,76344,17647
48282,UQGU97554459,22752,60624
48284,GDKU22797093,27712,11159
48288,KVQK50168638,50583,5746


In [15]:
# get the node-tensors for these edges
cwe_ten = get_tensors(cwe_edges, False)
cwe_ten

(tensor([111902,  66766,  63462,  ...,  27712,  50583,  31300]),
 tensor([60119,  7305,  9719,  ..., 11159,  5746, 47224]))

### Extract COO for ('external', 'wire trxn', 'customer') 

In [16]:
# find transactions where sender is an external user
ewc = w_trxns[(w_trxns['sender_ext'] == 1) & (w_trxns['receiver_ext'] == 0)][['trxn_id', 'sender_global_id', 'rec_global_id']]

# Extract the edges
ewc_edges = get_edges(ewc, True, False)
ewc_edges

Unnamed: 0,trxn_id,Source,Destination
2,IXVD84599097,11664,55360
6,HBOX39904199,3829,13150
7,WGKT37513493,13915,94454
8,FYQD25855828,12309,25335
9,HRYI70025968,7357,11738
...,...,...,...
48280,XIME58694157,3393,30639
48281,CPNR42645167,2866,104693
48285,BAIZ16468662,12139,55716
48287,LRRP66624765,3156,39178


In [17]:
# get the node-tensors for these edges
ewc_ten = get_tensors(ewc_edges, False)
ewc_ten

(tensor([11664,  3829, 13915,  ..., 12139,  3156,  7534]),
 tensor([55360, 13150, 94454,  ..., 55716, 39178, 44906]))

## Get edges for emt transactions

In [18]:
# Get info for wire transactions
e_trxns = emts[['trxn_id', 'sender_global_id', 'rec_global_id', 'sender_ext', 'receiver_ext']]
e_trxns

Unnamed: 0,trxn_id,sender_global_id,rec_global_id,sender_ext,receiver_ext
0,RAUG63886259,166925,69450,1,0
1,WPXP45854083,68825,155054,0,1
2,TRNT55099512,24521,6745,0,0
3,YSNV62579819,77547,23015,0,0
4,MZYI28216959,156121,78536,1,0
...,...,...,...,...,...
318895,XAOG83079223,62944,91980,0,0
318896,USHN74907347,156186,91130,1,0
318897,VXES44436032,3063,82331,0,0
318898,LTUK21435620,155038,112810,1,0


### Extract COO for ('customer', 'emt trxn', 'customer') 

In [19]:
# find transactions where both customers are not external users
cec = e_trxns[(e_trxns['sender_ext'] == 0) & (e_trxns['receiver_ext'] == 0)][['trxn_id', 'sender_global_id', 'rec_global_id']]

# Extract the edges
cec_edges = get_edges(cec, False, False)
cec_edges

Unnamed: 0,trxn_id,Source,Destination
2,TRNT55099512,23983,22815
3,YSNV62579819,35353,72029
5,VDZX91214146,64210,88057
7,VAEX16776661,32650,18824
10,DZID77526302,31905,125927
...,...,...,...
318891,OIYX14211075,98881,113993
318893,ODUI84274871,57211,114883
318895,XAOG83079223,24651,115180
318897,VXES44436032,46399,52114


In [20]:
# get the node-tensors for these edges
cec_ten = get_tensors(cec_edges, False)
cec_ten

(tensor([23983, 35353, 64210,  ..., 24651, 46399,  1056]),
 tensor([ 22815,  72029,  88057,  ..., 115180,  52114,  36524]))

### Extract COO for ('customer', 'emt trxn', 'external') 

In [21]:
# find transactions where receiver is an external user
cee = e_trxns[(e_trxns['sender_ext'] == 0) & (e_trxns['receiver_ext'] == 1)][['trxn_id', 'sender_global_id', 'rec_global_id']]

# Extract the edges
cee_edges = get_edges(cee, False, True)
cee_edges

Unnamed: 0,trxn_id,Source,Destination
1,WPXP45854083,96043,37089
8,QBGC19715656,48360,19013
15,JRRS77736677,89356,21074
16,CQOM12893631,96420,25173
18,CBGO99432725,49146,7915
...,...,...,...
318882,JTRI57256529,30088,60164
318883,KFHZ16613401,93232,17139
318884,NFFQ26281049,119440,52714
318888,PTPV58305040,20636,45299


In [22]:
# get the node-tensors for these edges
cee_ten = get_tensors(cee_edges, False)
cee_ten

(tensor([ 96043,  48360,  89356,  ..., 119440,  20636,  58123]),
 tensor([37089, 19013, 21074,  ..., 52714, 45299, 12696]))

### Extract COO for ('external', 'emt trxn', 'customer') 

In [23]:
# find transactions where sender is an external user
eec = e_trxns[(e_trxns['sender_ext'] == 1) & (e_trxns['receiver_ext'] == 0)][['trxn_id', 'sender_global_id', 'rec_global_id']]

# Extract the edges
eec_edges = get_edges(eec, True, False)
eec_edges

Unnamed: 0,trxn_id,Source,Destination
0,RAUG63886259,46535,75465
4,MZYI28216959,37918,55368
6,DKKB81857409,6304,27989
9,VNFJ29402104,21011,73106
11,YDCX94728182,16824,54593
...,...,...,...
318889,YVQU50726638,14522,78866
318892,SRJG52523730,25078,22724
318894,JOWI91900123,2008,81999
318896,USHN74907347,5995,33408


In [24]:
# get the node-tensors for these edges
eec_ten = get_tensors(eec_edges, False)
eec_ten

(tensor([46535, 37918,  6304,  ...,  2008,  5995,  5743]),
 tensor([75465, 55368, 27989,  ..., 81999, 33408,  4702]))

# Create self loops for cash transactions

### Create self loops for customer nodes

In [25]:
# Create self loops for customer nodes
gids = [ x for x in range(len(cust_info['Global_id']))]
cust_self = pd.DataFrame({'Global_id': gids, 'Gids': gids})
cust_self

Unnamed: 0,Global_id,Gids
0,0,0
1,1,1
2,2,2
3,3,3
4,4,4
...,...,...
126178,126178,126178
126179,126179,126179
126180,126180,126180
126181,126181,126181


In [26]:
# get the tensors
cself_ten = get_tensors(cust_self, True)
cself_ten

(tensor([     0,      1,      2,  ..., 126180, 126181, 126182]),
 tensor([     0,      1,      2,  ..., 126180, 126181, 126182]))

### Create self loops for external nodes

In [27]:
# Create self loops for customer nodes
gids = [ x for x in range(len(ext_info['Global_id']))]
ext_self = pd.DataFrame({'Global_id': gids, 'Gids': gids})
ext_self

Unnamed: 0,Global_id,Gids
0,0,0
1,1,1
2,2,2
3,3,3
4,4,4
...,...,...
69870,69870,69870
69871,69871,69871
69872,69872,69872
69873,69873,69873


In [28]:
# get the tensors
extself_ten = get_tensors(ext_self, True)
extself_ten

(tensor([    0,     1,     2,  ..., 69872, 69873, 69874]),
 tensor([    0,     1,     2,  ..., 69872, 69873, 69874]))

# Construct Graph

#### Create graph data using edge COOs

In [29]:
# Combine the edge COOs into the graph data
graph_data = {
    ('customer', 'cwc', 'customer'): cwc_ten,
    ('customer', 'cwe', 'external'): cwe_ten,
    ('external', 'ewc', 'customer'): ewc_ten,
    ('customer', 'cec', 'customer'): cec_ten,
    ('customer', 'cee', 'external'): cee_ten,
    ('external', 'eec', 'customer'): eec_ten,
    ('customer', 'cc_self', 'customer'): cself_ten,
    ('external', 'ee_self', 'external'): extself_ten,   
}

In [30]:
g = dgl.heterograph(graph_data)

In [31]:
g.ntypes

['customer', 'external']

In [32]:
g.etypes

['cc_self', 'cec', 'cee', 'cwc', 'cwe', 'ee_self', 'eec', 'ewc']

In [33]:
g.canonical_etypes

[('customer', 'cc_self', 'customer'),
 ('customer', 'cec', 'customer'),
 ('customer', 'cee', 'external'),
 ('customer', 'cwc', 'customer'),
 ('customer', 'cwe', 'external'),
 ('external', 'ee_self', 'external'),
 ('external', 'eec', 'customer'),
 ('external', 'ewc', 'customer')]

In [34]:
g.num_nodes()

196058

In [35]:
g.num_edges()

563250

# Add node/edge features

In [36]:
# create a function to extract feature data into tensors
def get_fts(features_df):
    """
    """
    # convert the dataframe to numpy 
    x = features_df.to_numpy()
    
    # Create a tensor with feature data 
    tens = th.tensor(x.copy()).float()
    
    return tens

### Add customer node features

In [37]:
# Drop the id column in cust_info
cust_fts = detail_cust.drop(columns='Global_id')
cust_fts

Unnamed: 0,Age,Tenure,label,Gender_female,Gender_male,Occupation_num,Num_wires_sent,Avg_wire_amt_sent,Num_wires_received,Avg_wire_amt_received,...,wse_part,wsc_part,ese_part,esc_part,esw_part,csc_part,csw_part,cse_part,Num_unique_communities_participated,Total_communities_participated
0,45.0,13.0,0,1,0,8,2,1985.75,0,0.000000,...,1.0,0.0,99.0,0.0,20.0,0.0,0.0,0.0,1.0,17.0
1,52.0,8.0,0,0,1,146,0,0.00,0,0.000000,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,43.0,11.0,0,1,0,115,2,14131.50,4,5657.375000,...,27.0,0.0,140.0,0.0,27.0,29.0,4.0,14.0,2.0,26.0
3,39.0,21.0,1,0,1,195,2,5214.50,3,7965.166667,...,34.0,8.0,62.0,16.0,29.0,172.0,23.0,45.0,8.0,29.0
4,26.0,8.0,0,1,0,212,0,0.00,0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126178,37.0,12.0,1,1,0,186,0,0.00,0,0.000000,...,0.0,0.0,16.0,19.0,1.0,222.0,0.0,42.0,8.0,21.0
126179,46.0,3.0,0,0,1,211,0,0.00,0,0.000000,...,4.0,0.0,96.0,6.0,3.0,0.0,0.0,18.0,16.0,16.0
126180,31.0,10.0,0,0,1,143,0,0.00,0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
126181,21.0,3.0,0,1,0,235,0,0.00,1,1466.000000,...,9.0,0.0,81.0,0.0,6.0,0.0,0.0,2.0,9.0,16.0


In [38]:
# create a tensor with customer feature data
x_custs = get_fts(cust_fts)
x_custs

tensor([[45., 13.,  0.,  ...,  0.,  1., 17.],
        [52.,  8.,  0.,  ...,  0.,  1.,  1.],
        [43., 11.,  0.,  ..., 14.,  2., 26.],
        ...,
        [31., 10.,  0.,  ...,  0.,  1.,  1.],
        [21.,  3.,  0.,  ...,  2.,  9., 16.],
        [37.,  1.,  0.,  ...,  0.,  1.,  1.]])

In [39]:
# Add features for customer nodes
g.nodes['customer'].data['features'] = x_custs

### Add external user node features

In [40]:
# Drop the id column in ext_info
ext_fts = ext_info.drop(columns='Global_id')
ext_fts

Unnamed: 0,Num_wires_sent,Avg_wire_amt_sent,Num_wires_received,Avg_wire_amt_received,Num_emts_sent,Avg_emt_amt_sent,Num_emts_received,Avg_emt_amt_received,Country_AU,Country_CA,...,wse_part,wsc_part,ese_part,esc_part,esw_part,csc_part,csw_part,cse_part,Num_unique_communities_participated,Total_communities_participated
0,1,1546.0,0,0.0,0,0.000000,0,0.00,1,0,...,10.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
1,1,1046.0,0,0.0,9,419.888889,10,797.35,0,1,...,8.0,0.0,164.0,4.0,12.0,0.0,0.0,2.0,18,20
2,1,1444.0,0,0.0,0,0.000000,0,0.00,0,0,...,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
3,1,3710.0,1,4011.0,8,1643.000000,5,840.40,0,1,...,16.0,1.0,76.0,3.0,12.0,0.0,0.0,18.0,3,15
4,1,38450.0,0,0.0,0,0.000000,0,0.00,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69870,0,0.0,0,0.0,1,403.000000,2,494.50,0,0,...,8.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,3,3
69871,0,0.0,0,0.0,1,1128.000000,2,1065.25,0,0,...,0.0,0.0,17.0,8.0,3.0,0.0,0.0,14.0,3,3
69872,0,0.0,0,0.0,2,377.500000,2,166.50,0,0,...,1.0,0.0,23.0,4.0,1.0,0.0,0.0,4.0,4,4
69873,0,0.0,0,0.0,3,9.333333,0,0.00,0,0,...,0.0,0.0,10.0,7.0,3.0,0.0,0.0,0.0,3,3


In [41]:
# create a tensor with external user feature data
x_ext = get_fts(ext_fts)
x_ext

tensor([[1.0000e+00, 1.5460e+03, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
         1.0000e+00],
        [1.0000e+00, 1.0460e+03, 0.0000e+00,  ..., 2.0000e+00, 1.8000e+01,
         2.0000e+01],
        [1.0000e+00, 1.4440e+03, 0.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
         1.0000e+00],
        ...,
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 4.0000e+00, 4.0000e+00,
         4.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 3.0000e+00,
         3.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 4.0000e+00,
         4.0000e+00]])

In [42]:
# Add features for customer nodes
g.nodes['external'].data['features'] = x_ext

# Get edge features

Extract the edge features for each canonical edge type.

In [43]:
# Create a function to extract the features for a given canonical edge type
def get_efeats(edges, trxns_df):
    """
    """
    
    # Generate a map to map the corresponding transactions to the indexes in the trxns_df
    i_map = gen_map(edges, True)
    
    # Get a list of transaction ids in the edges df
    ids = list(edges['trxn_id'])
    
    # filter for the trxns that have ids in the edges df
    selected = trxns_df[trxns_df['trxn_id'].isin(ids)]
    
    # Add a column for the corresponding index in trxns_df
    selected.loc[:, 'index'] = selected.loc[:, 'trxn_id'].map(i_map)
    
    # order the dataframe by the index and drop unecessary columns
    selected = selected.sort_values(by='index')
    selected = selected.drop(columns=['trxn_id', 'index'])
    
    # Get the tensors for the data
    x = get_fts(selected)
    
    return x

### Get ('customer', 'wire trxn', 'customer') features

In [44]:
# Get wire transactions related to cwc
cwc_fts = get_efeats(cwc_edges, wires)
cwc_fts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected.loc[:, 'index'] = selected.loc[:, 'trxn_id'].map(i_map)


tensor([[1.2670e+03, 3.4000e+01, 0.0000e+00,  ..., 0.0000e+00, 8.0000e+00,
         5.3000e+01],
        [1.5460e+03, 3.6000e+01, 0.0000e+00,  ..., 0.0000e+00, 1.9000e+01,
         1.6400e+02],
        [5.6060e+03, 2.3000e+01, 0.0000e+00,  ..., 0.0000e+00, 2.1700e+02,
         9.1000e+01],
        ...,
        [1.0100e+04, 5.3000e+01, 3.5000e+01,  ..., 0.0000e+00, 1.1800e+02,
         5.1000e+01],
        [2.2100e+03, 1.8000e+01, 0.0000e+00,  ..., 0.0000e+00, 8.4000e+01,
         1.2000e+01],
        [4.0840e+03, 3.4000e+01, 5.0000e+00,  ..., 1.0000e+00, 2.2800e+02,
         1.9300e+02]])

### Get ('customer', 'wire trxn', 'external') features

In [45]:
# Get wire transactions related to cwc
cwe_fts = get_efeats(cwe_edges, wires)
cwe_fts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected.loc[:, 'index'] = selected.loc[:, 'trxn_id'].map(i_map)


tensor([[8.5910e+03, 3.8000e+01, 5.0000e+00,  ..., 0.0000e+00, 9.1000e+01,
         2.5000e+02],
        [1.5870e+03, 2.2000e+01, 3.0000e+00,  ..., 0.0000e+00, 2.1700e+02,
         2.5000e+02],
        [2.5035e+03, 4.8000e+01, 1.6000e+01,  ..., 0.0000e+00, 1.8600e+02,
         2.5000e+02],
        ...,
        [3.3050e+03, 3.3000e+01, 8.0000e+00,  ..., 0.0000e+00, 2.1600e+02,
         2.5000e+02],
        [5.0670e+03, 2.5000e+01, 0.0000e+00,  ..., 0.0000e+00, 2.1700e+02,
         2.5000e+02],
        [1.8874e+04, 4.4000e+01, 1.0000e+00,  ..., 0.0000e+00, 2.1600e+02,
         2.5000e+02]])

### Get ('external', 'wire trxn', 'customer') features

In [46]:
# Get wire transactions related to cwc
ewc_fts = get_efeats(ewc_edges, wires)
ewc_fts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected.loc[:, 'index'] = selected.loc[:, 'trxn_id'].map(i_map)


tensor([[ 1.4805e+03, -1.0000e+00, -1.0000e+00,  ...,  0.0000e+00,
          2.5000e+02,  5.1000e+01],
        [ 2.3460e+03, -1.0000e+00, -1.0000e+00,  ...,  0.0000e+00,
          2.5000e+02,  2.4200e+02],
        [ 1.6825e+03, -1.0000e+00, -1.0000e+00,  ...,  0.0000e+00,
          2.5000e+02,  1.8400e+02],
        ...,
        [ 3.8485e+03, -1.0000e+00, -1.0000e+00,  ...,  0.0000e+00,
          2.5000e+02,  1.0000e+01],
        [ 6.0595e+03, -1.0000e+00, -1.0000e+00,  ...,  0.0000e+00,
          2.5000e+02,  2.4700e+02],
        [ 1.5420e+03, -1.0000e+00, -1.0000e+00,  ...,  1.0000e+00,
          2.5000e+02,  2.1900e+02]])

### Get ('customer', 'emt trxn', 'customer') features

In [47]:
# Get wire transactions related to cwc
cec_fts = get_efeats(cec_edges, emts)
cec_fts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected.loc[:, 'index'] = selected.loc[:, 'trxn_id'].map(i_map)


tensor([[480.,  69.,  14.,  ..., 188.,  52.,   0.],
        [735.,  44.,  14.,  ...,   8.,  62.,   0.],
        [ 60.,  37.,   1.,  ...,  18., 131.,   0.],
        ...,
        [682.,  34.,   3.,  ...,  64.,  60.,   0.],
        [208.,  46.,   9.,  ...,  73., 235.,   0.],
        [ 10.,  32.,   7.,  ...,  91., 155.,   0.]])

### Get ('customer', 'emt trxn', 'external') features

In [48]:
# Get wire transactions related to cwc
cee_fts = get_efeats(cee_edges, emts)
cee_fts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected.loc[:, 'index'] = selected.loc[:, 'trxn_id'].map(i_map)


tensor([[ 46.,  34.,   8.,  ..., 110., 250.,   0.],
        [317.,  18.,   0.,  ...,   7., 250.,   0.],
        [300.,  38.,   4.,  ...,  16., 250.,   0.],
        ...,
        [445.,  39.,   0.,  ..., 111., 250.,   0.],
        [ 65.,  25.,   0.,  ...,  39., 250.,   0.],
        [161.,  35.,   7.,  ..., 222., 250.,   0.]])

### Get ('external', 'emt trxn', 'customer') features

In [49]:
# Get wire transactions related to cwc
eec_fts = get_efeats(eec_edges, emts)
eec_fts

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected.loc[:, 'index'] = selected.loc[:, 'trxn_id'].map(i_map)


tensor([[ 1.1705e+03, -1.0000e+00, -1.0000e+00,  ...,  2.5000e+02,
          1.1100e+02,  0.0000e+00],
        [ 5.4000e+02, -1.0000e+00, -1.0000e+00,  ...,  2.5000e+02,
          1.2000e+02,  0.0000e+00],
        [ 2.4100e+02, -1.0000e+00, -1.0000e+00,  ...,  2.5000e+02,
          9.1000e+01,  0.0000e+00],
        ...,
        [ 4.7900e+02, -1.0000e+00, -1.0000e+00,  ...,  2.5000e+02,
          9.1000e+01,  0.0000e+00],
        [ 1.1900e+02, -1.0000e+00, -1.0000e+00,  ...,  2.5000e+02,
          3.7000e+01,  0.0000e+00],
        [ 1.5000e+02, -1.0000e+00, -1.0000e+00,  ...,  2.5000e+02,
          1.9500e+02,  1.0000e+00]])

### Get ('customer', 'self_loop', 'customer') Cash Transaction features

In [50]:
# since the cash transactions are stored in the cust_info
# we do not need to remap the indexes as they are in the right order
c_trxns = cust_info.iloc[:, -4:]
c_trxns

Unnamed: 0,Num_deposit,Num_withdrawal,Avg_deposit,Avg_withdrawal
0,0.0,0.0,0.000000,0.00
1,0.0,0.0,0.000000,0.00
2,2.0,0.0,1917.500000,0.00
3,9.0,2.0,7568.333333,9792.50
4,0.0,0.0,0.000000,0.00
...,...,...,...,...
126178,10.0,4.0,8767.000000,8248.75
126179,0.0,0.0,0.000000,0.00
126180,0.0,0.0,0.000000,0.00
126181,0.0,0.0,0.000000,0.00


In [51]:
# Get the tensors
cself_fts = get_fts(c_trxns)
cself_fts

tensor([[   0.0000,    0.0000,    0.0000,    0.0000],
        [   0.0000,    0.0000,    0.0000,    0.0000],
        [   2.0000,    0.0000, 1917.5000,    0.0000],
        ...,
        [   0.0000,    0.0000,    0.0000,    0.0000],
        [   0.0000,    0.0000,    0.0000,    0.0000],
        [   0.0000,    0.0000,    0.0000,    0.0000]])

### Get ('external', 'self_loop', 'external') Cash Transaction features

In [52]:
# since the external users have 0 transactions, create tensors with just 0s
eself_fts = th.zeros((len(ext_info)), 4)
eself_fts 

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

## Set edge features

In [53]:
# set the edge features in the graph
g.edges[('customer', 'cwc', 'customer')].data['features'] = cwc_fts
g.edges[('customer', 'cwe', 'external')].data['features'] = cwe_fts
g.edges[('external', 'ewc', 'customer')].data['features'] = ewc_fts
g.edges[('customer', 'cec', 'customer')].data['features'] = cec_fts
g.edges[('customer', 'cee', 'external')].data['features'] = cee_fts
g.edges[('external', 'eec', 'customer')].data['features'] = eec_fts
g.edges[('customer', 'cc_self', 'customer')].data['features'] = cself_fts
g.edges[('external', 'ee_self', 'external')].data['features'] = eself_fts

In [54]:
g

Graph(num_nodes={'customer': 126183, 'external': 69875},
      num_edges={('customer', 'cc_self', 'customer'): 126183, ('customer', 'cec', 'customer'): 122064, ('customer', 'cee', 'external'): 73545, ('customer', 'cwc', 'customer'): 14668, ('customer', 'cwe', 'external'): 12780, ('external', 'ee_self', 'external'): 69875, ('external', 'eec', 'customer'): 123291, ('external', 'ewc', 'customer'): 20844},
      metagraph=[('customer', 'customer', 'cc_self'), ('customer', 'customer', 'cec'), ('customer', 'customer', 'cwc'), ('customer', 'external', 'cee'), ('customer', 'external', 'cwe'), ('external', 'external', 'ee_self'), ('external', 'customer', 'eec'), ('external', 'customer', 'ewc')])

# Save graph

In [55]:
# Save the graph to a file
dgl.save_graphs('customers_graph', [g])